use lazy_static::lazy_static;
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::cell::Cell;
use std::collections::{HashMap, HashSet, VecDeque};
use crate::ssa::types::{SsaFunction, SsaNameId};
use crate::types::{CfgInfo, RefType, VarRef};
use crate::Language;
use crate::TldrError;
thread_local! {
static AST_ONLY_TEST_MODE: Cell<bool> = const { Cell::new(false) };
}
pub struct AstOnlyTestModeGuard {
previous: bool,
}
impl AstOnlyTestModeGuard {
pub fn enter() -> Self {
let previous = AST_ONLY_TEST_MODE.with(|m| {
let prev = m.get();
m.set(true);
prev
});
Self { previous }
}
}
impl Drop for AstOnlyTestModeGuard {
fn drop(&mut self) {
let prev = self.previous;
AST_ONLY_TEST_MODE.with(|m| m.set(prev));
}
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
enum TaintKey {
Versioned(SsaNameId),
Raw(String),
}
const MAX_TAINT_ITERATIONS: usize = 1000;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum TaintSourceType {
UserInput,
Stdin,
HttpParam,
HttpBody,
EnvVar,
FileRead,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum TaintSinkType {
SqlQuery,
CodeEval,
CodeExec,
CodeCompile,
ShellExec,
FileWrite,
HtmlOutput,
FileOpen,
HttpRequest,
Deserialize,
OpenRedirect,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SanitizerType {
Numeric,
Shell,
Html,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TaintSource {
pub var: String,
pub line: u32,
pub source_type: TaintSourceType,
#[serde(skip_serializing_if = "Option::is_none")]
pub statement: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TaintSink {
pub var: String,
pub line: u32,
pub sink_type: TaintSinkType,
pub tainted: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub statement: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TaintFlow {
pub source: TaintSource,
pub sink: TaintSink,
pub path: Vec<usize>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct TaintInfo {
#[serde(rename = "function", alias = "function_name")]
pub function_name: String,
pub tainted_vars: HashMap<usize, HashSet<String>>,
pub sources: Vec<TaintSource>,
pub sinks: Vec<TaintSink>,
pub flows: Vec<TaintFlow>,
pub sanitized_vars: HashSet<String>,
#[serde(default = "default_convergence")]
#[serde(skip_serializing_if = "Option::is_none")]
pub convergence: Option<String>,
}
fn default_convergence() -> Option<String> {
None
}
impl TaintInfo {
pub fn new(function_name: impl Into<String>) -> Self {
Self {
function_name: function_name.into(),
tainted_vars: HashMap::new(),
sources: Vec::new(),
sinks: Vec::new(),
flows: Vec::new(),
sanitized_vars: HashSet::new(),
convergence: None,
}
}
pub fn is_tainted(&self, block_id: usize, var: &str) -> bool {
self.tainted_vars
.get(&block_id)
.map(|vars| vars.contains(var))
.unwrap_or(false)
}
pub fn get_vulnerabilities(&self) -> Vec<&TaintSink> {
self.sinks.iter().filter(|s| s.tainted).collect()
}
}
pub fn build_predecessors(cfg: &CfgInfo) -> HashMap<usize, Vec<usize>> {
let mut preds: HashMap<usize, Vec<usize>> = HashMap::new();
for block in &cfg.blocks {
preds.entry(block.id).or_default();
}
for edge in &cfg.edges {
preds.entry(edge.to).or_default().push(edge.from);
}
preds
}
pub fn build_successors(cfg: &CfgInfo) -> HashMap<usize, Vec<usize>> {
let mut succs: HashMap<usize, Vec<usize>> = HashMap::new();
for block in &cfg.blocks {
succs.entry(block.id).or_default();
}
for edge in &cfg.edges {
succs.entry(edge.from).or_default().push(edge.to);
}
succs
}
pub fn build_line_to_block(cfg: &CfgInfo) -> HashMap<u32, usize> {
let mut mapping: HashMap<u32, usize> = HashMap::new();
let mut all_lines: HashSet<u32> = HashSet::new();
for block in &cfg.blocks {
for line in block.lines.0..=block.lines.1 {
all_lines.insert(line);
}
}
for line in all_lines {
let mut best_block: Option<(usize, u32)> = None;
for block in &cfg.blocks {
let (start, end) = block.lines;
if line >= start && line <= end {
let size = end - start + 1;
if best_block.is_none()
|| size > best_block.unwrap().1
|| (size == best_block.unwrap().1 && block.id > best_block.unwrap().0)
{
best_block = Some((block.id, size));
}
}
}
if let Some((block_id, _)) = best_block {
mapping.insert(line, block_id);
}
}
mapping
}
pub fn build_refs_by_block<'a>(
refs: &'a [VarRef],
line_to_block: &HashMap<u32, usize>,
) -> HashMap<usize, Vec<&'a VarRef>> {
let mut by_block: HashMap<usize, Vec<&VarRef>> = HashMap::new();
for var_ref in refs {
if let Some(&block_id) = line_to_block.get(&var_ref.line) {
by_block.entry(block_id).or_default().push(var_ref);
}
}
for refs in by_block.values_mut() {
refs.sort_by_key(|r| r.line);
}
by_block
}
pub fn validate_cfg(cfg: &CfgInfo) -> Result<(), TldrError> {
if cfg.blocks.is_empty() {
return Err(TldrError::InvalidArgs {
arg: "cfg".to_string(),
message: "Empty CFG".to_string(),
suggestion: None,
});
}
let block_ids: HashSet<usize> = cfg.blocks.iter().map(|b| b.id).collect();
if !block_ids.contains(&cfg.entry_block) {
return Err(TldrError::InvalidArgs {
arg: "cfg".to_string(),
message: format!("Entry block {} not in blocks", cfg.entry_block),
suggestion: Some(format!(
"Valid block IDs are: {:?}",
block_ids.iter().collect::<Vec<_>>()
)),
});
}
for edge in &cfg.edges {
if !block_ids.contains(&edge.from) {
return Err(TldrError::InvalidArgs {
arg: "cfg".to_string(),
message: format!(
"Edge references invalid source block: {} -> {}",
edge.from, edge.to
),
suggestion: Some(format!(
"Valid block IDs are: {:?}",
block_ids.iter().collect::<Vec<_>>()
)),
});
}
if !block_ids.contains(&edge.to) {
return Err(TldrError::InvalidArgs {
arg: "cfg".to_string(),
message: format!(
"Edge references invalid target block: {} -> {}",
edge.from, edge.to
),
suggestion: Some(format!(
"Valid block IDs are: {:?}",
block_ids.iter().collect::<Vec<_>>()
)),
});
}
}
Ok(())
}
#[derive(Clone)]
pub struct LanguagePatterns {
pub sources: Vec<(Regex, TaintSourceType)>,
pub sinks: Vec<(Regex, TaintSinkType)>,
pub sanitizers: Vec<(Regex, SanitizerType)>,
}
lazy_static! {
static ref PYTHON_PATTERNS: LanguagePatterns = LanguagePatterns {
sources: vec![],
sinks: vec![],
sanitizers: vec![],
};
}
lazy_static! {
static ref TYPESCRIPT_PATTERNS: LanguagePatterns = LanguagePatterns {
sources: vec![],
sinks: vec![],
sanitizers: vec![],
};
}
lazy_static! {
static ref GO_PATTERNS: LanguagePatterns = LanguagePatterns {
sources: vec![],
sinks: vec![],
sanitizers: vec![],
};
}
lazy_static! {
static ref JAVA_PATTERNS: LanguagePatterns = LanguagePatterns {
sources: vec![],
sinks: vec![],
sanitizers: vec![],
};
}
lazy_static! {
static ref RUST_PATTERNS: LanguagePatterns = LanguagePatterns {
sources: vec![],
sinks: vec![],
sanitizers: vec![],
};
}
lazy_static! {
static ref C_PATTERNS: LanguagePatterns = LanguagePatterns {
sources: vec![],
sinks: vec![],
sanitizers: vec![],
};
}
lazy_static! {
static ref CPP_PATTERNS: LanguagePatterns = LanguagePatterns {
sources: vec![],
sinks: vec![],
sanitizers: vec![],
};
}
lazy_static! {
static ref RUBY_PATTERNS: LanguagePatterns = LanguagePatterns {
sources: vec![
(Regex::new(r"\bgets\b").unwrap(), TaintSourceType::UserInput),
],
sinks: vec![],
sanitizers: vec![],
};
}
lazy_static! {
static ref KOTLIN_PATTERNS: LanguagePatterns = LanguagePatterns {
sources: vec![],
sinks: vec![],
sanitizers: vec![],
};
}
lazy_static! {
static ref SWIFT_PATTERNS: LanguagePatterns = LanguagePatterns {
sources: vec![],
sinks: vec![],
sanitizers: vec![],
};
}
lazy_static! {
static ref CSHARP_PATTERNS: LanguagePatterns = LanguagePatterns {
sources: vec![],
sinks: vec![],
sanitizers: vec![],
};
}
lazy_static! {
static ref SCALA_PATTERNS: LanguagePatterns = LanguagePatterns {
sources: vec![],
sinks: vec![],
sanitizers: vec![],
};
}
lazy_static! {
static ref PHP_PATTERNS: LanguagePatterns = LanguagePatterns {
sources: vec![],
sinks: vec![],
sanitizers: vec![],
};
}
lazy_static! {
static ref LUA_PATTERNS: LanguagePatterns = LanguagePatterns {
sources: vec![],
sinks: vec![],
sanitizers: vec![],
};
}
lazy_static! {
static ref ELIXIR_PATTERNS: LanguagePatterns = LanguagePatterns {
sources: vec![],
sinks: vec![],
sanitizers: vec![],
};
}
lazy_static! {
static ref OCAML_PATTERNS: LanguagePatterns = LanguagePatterns {
sources: vec![],
sinks: vec![],
sanitizers: vec![],
};
}
pub fn get_patterns(language: Language) -> &'static LanguagePatterns {
match language {
Language::Python => &PYTHON_PATTERNS,
Language::TypeScript | Language::JavaScript => &TYPESCRIPT_PATTERNS,
Language::Go => &GO_PATTERNS,
Language::Java => &JAVA_PATTERNS,
Language::Rust => &RUST_PATTERNS,
Language::C => &C_PATTERNS,
Language::Cpp => &CPP_PATTERNS,
Language::Ruby => &RUBY_PATTERNS,
Language::Kotlin => &KOTLIN_PATTERNS,
Language::Swift => &SWIFT_PATTERNS,
Language::CSharp => &CSHARP_PATTERNS,
Language::Scala => &SCALA_PATTERNS,
Language::Php => &PHP_PATTERNS,
Language::Lua | Language::Luau => &LUA_PATTERNS,
Language::Elixir => &ELIXIR_PATTERNS,
Language::Ocaml => &OCAML_PATTERNS,
}
}
pub fn detect_sources(statement: &str, line: u32, language: Language) -> Vec<TaintSource> {
if AST_ONLY_TEST_MODE.with(|m| m.get()) {
return Vec::new();
}
let mut sources = Vec::new();
let patterns = get_patterns(language);
for (pattern, source_type) in patterns.sources.iter() {
if pattern.is_match(statement) {
if let Some(var) = extract_assigned_var(statement) {
sources.push(TaintSource {
var,
line,
source_type: *source_type,
statement: Some(statement.to_string()),
});
} else {
if let Some(var) = extract_call_arg(statement, pattern) {
sources.push(TaintSource {
var,
line,
source_type: *source_type,
statement: Some(statement.to_string()),
});
} else {
let var = extract_source_var_from_statement(statement);
if let Some(var) = var {
sources.push(TaintSource {
var,
line,
source_type: *source_type,
statement: Some(statement.to_string()),
});
}
}
}
}
}
sources
}
fn extract_source_var_from_statement(statement: &str) -> Option<String> {
if let Some(pos) = statement.find(">>") {
let after = statement[pos + 2..].trim();
let var = after.split_whitespace().next().unwrap_or("");
let var = var.trim_end_matches(|c: char| !c.is_alphanumeric() && c != '_');
if is_valid_identifier(var) {
return Some(var.to_string());
}
}
if let Some(pos) = statement.find('&') {
let after = &statement[pos + 1..];
let var = after
.split(|c: char| !c.is_alphanumeric() && c != '_')
.next()
.unwrap_or("");
if is_valid_identifier(var) {
return Some(var.to_string());
}
}
let tokens: Vec<&str> = statement.split_whitespace().collect();
if tokens.len() >= 2 {
for tok in tokens.iter().skip(1) {
let var = tok.split('(').next().unwrap_or(tok);
let var = var.trim_end_matches(|c: char| !c.is_alphanumeric() && c != '_');
if is_valid_identifier(var) && var.len() > 1 {
return Some(var.to_string());
}
}
}
None
}
pub fn detect_sinks(statement: &str, line: u32, language: Language) -> Vec<TaintSink> {
if AST_ONLY_TEST_MODE.with(|m| m.get()) {
return Vec::new();
}
let mut sinks = Vec::new();
let patterns = get_patterns(language);
for (pattern, sink_type) in patterns.sinks.iter() {
if pattern.is_match(statement) {
if let Some(var) = extract_call_arg(statement, pattern) {
sinks.push(TaintSink {
var,
line,
sink_type: *sink_type,
tainted: false,
statement: Some(statement.to_string()),
});
} else {
if let Some(var) = extract_sink_var_from_statement(statement, pattern) {
sinks.push(TaintSink {
var,
line,
sink_type: *sink_type,
tainted: false,
statement: Some(statement.to_string()),
});
} else {
let interp_vars = extract_interpolated_vars(statement);
for var in interp_vars {
sinks.push(TaintSink {
var,
line,
sink_type: *sink_type,
tainted: false,
statement: Some(statement.to_string()),
});
}
}
}
}
}
sinks
}
fn extract_sink_var_from_statement(statement: &str, pattern: &Regex) -> Option<String> {
if let Some(m) = pattern.find(statement) {
let after = &statement[m.end()..];
let after = after.trim();
if after.is_empty() || !after.starts_with('(') {
if let Some(eq_pos) = statement.rfind('=') {
let before_eq = if eq_pos > 0 {
statement.as_bytes()[eq_pos - 1]
} else {
b' '
};
let after_eq = if eq_pos + 1 < statement.len() {
statement.as_bytes()[eq_pos + 1]
} else {
b' '
};
if before_eq != b'='
&& before_eq != b'!'
&& before_eq != b'<'
&& before_eq != b'>'
&& after_eq != b'='
{
let rhs = statement[eq_pos + 1..].trim();
let var = rhs
.split(|c: char| !c.is_alphanumeric() && c != '_')
.next()
.unwrap_or("");
if is_valid_identifier(var) {
return Some(var.to_string());
}
}
}
}
let search_area = &statement[m.start()..];
if let Some(open) = search_area.find('(') {
let rest = &search_area[open + 1..];
let end = rest.find([',', ')']).unwrap_or(rest.len());
let arg = rest[..end].trim();
if !arg.starts_with('"') && !arg.starts_with('\'') && !arg.is_empty() {
let var_name = arg.split('.').next().unwrap_or(arg);
let var_name = var_name.trim_start_matches('$');
if is_valid_identifier(var_name) {
return Some(var_name.to_string());
}
}
}
if !after.is_empty() && !after.starts_with('(') {
let token = after
.split(|c: char| c.is_whitespace() || c == ';')
.next()
.unwrap_or("");
let token = token.trim_end_matches(|c: char| !c.is_alphanumeric() && c != '_');
if is_valid_identifier(token) {
return Some(token.to_string());
}
}
if statement.contains(';') {
for part in statement.split(';') {
let part = part.trim();
if pattern.is_match(part) {
continue;
}
let var = part
.split(|c: char| !c.is_alphanumeric() && c != '_')
.find(|t| is_valid_identifier(t));
if let Some(var) = var {
return Some(var.to_string());
}
}
}
}
None
}
pub fn detect_sanitizer(statement: &str, language: Language) -> Option<SanitizerType> {
if AST_ONLY_TEST_MODE.with(|m| m.get()) {
return None;
}
let patterns = get_patterns(language);
for (pattern, sanitizer_type) in patterns.sanitizers.iter() {
if pattern.is_match(statement) {
return Some(*sanitizer_type);
}
}
None
}
pub fn is_sanitizer(statement: &str, language: Language) -> bool {
detect_sanitizer(statement, language).is_some()
}
pub fn find_sanitizers_in_statement(
statement: &str,
_line: u32,
language: Language,
) -> Vec<(String, SanitizerType)> {
let mut result = Vec::new();
let patterns = get_patterns(language);
for (pattern, sanitizer_type) in patterns.sanitizers.iter() {
if pattern.is_match(statement) {
if let Some(var) = extract_assigned_var(statement) {
result.push((var, *sanitizer_type));
}
}
}
result
}
fn extract_assigned_var(statement: &str) -> Option<String> {
let trimmed = statement.trim();
if let Some(pos) = trimmed.find(":=") {
let before = &trimmed[..pos];
let var = before.trim().trim_start_matches('(').trim();
if is_valid_identifier(var) {
return Some(var.to_string());
}
if let Some(first) = var.split(',').next() {
let first = first.trim();
if is_valid_identifier(first) {
return Some(first.to_string());
}
}
}
if let Some(pos) = trimmed.find('=') {
if pos > 0 && trimmed.chars().nth(pos.saturating_sub(1)) == Some('=') {
return None;
}
if pos + 1 < trimmed.len() && trimmed.chars().nth(pos + 1) == Some('=') {
return None;
}
if pos > 0 {
let prev_char = trimmed.chars().nth(pos.saturating_sub(1));
if prev_char == Some('!') || prev_char == Some('<') || prev_char == Some('>') {
return None;
}
}
let before = &trimmed[..pos];
let var_part = if let Some(colon_pos) = before.find(':') {
&before[..colon_pos]
} else {
before
};
let var = var_part.trim();
if is_valid_identifier(var) {
return Some(var.to_string());
}
let tokens: Vec<&str> = var.split_whitespace().collect();
if tokens.len() >= 2 {
let last = tokens[tokens.len() - 1];
let clean = last.trim_start_matches('*').trim_start_matches('&');
let check = clean.trim_start_matches('$');
if !check.is_empty() && is_valid_identifier(check) {
return Some(clean.to_string());
}
}
if var.contains('{') || var.contains('(') || var.contains('[') {
let cleaned = var.replace(['{', '}', '(', ')', '[', ']', ':'], " ");
let idents: Vec<&str> = cleaned
.split_whitespace()
.filter(|t| is_valid_identifier(t) && *t != "ok" && *t != "err")
.collect();
if let Some(last_ident) = idents.last() {
return Some(last_ident.to_string());
}
}
if let Some(name) = var.strip_prefix('$') {
if is_valid_identifier(name) {
return Some(var.to_string());
}
}
}
None
}
fn extract_call_arg(statement: &str, pattern: &Regex) -> Option<String> {
if let Some(m) = pattern.find(statement) {
let after_match = &statement[m.end()..];
let rest = after_match.strip_prefix('(').unwrap_or(after_match);
let mut remaining = rest;
loop {
let end = remaining.find([',', ')']).unwrap_or(remaining.len());
let arg = remaining[..end].trim();
if !arg.is_empty()
&& !arg.starts_with('"')
&& !arg.starts_with('\'')
&& !arg.starts_with("f\"")
&& !arg.starts_with("f'")
&& !arg.starts_with("r\"")
&& !arg.starts_with("r'")
{
let var_name = arg.split('.').next().unwrap_or(arg);
let check_name = var_name.trim_start_matches('$');
if is_valid_identifier(check_name) {
return Some(var_name.to_string());
}
}
if arg.contains('+') {
for part in arg.split('+') {
let part = part.trim();
if !part.is_empty()
&& !part.starts_with('"')
&& !part.starts_with('\'')
&& !part.starts_with("f\"")
&& !part.starts_with("f'")
{
let var_name = part.split('.').next().unwrap_or(part);
let check_name = var_name.trim_start_matches('$');
if is_valid_identifier(check_name) {
return Some(var_name.to_string());
}
}
}
}
if end >= remaining.len() {
break;
}
let next_char = remaining.as_bytes()[end];
if next_char == b')' {
break;
}
remaining = &remaining[end + 1..];
}
}
None
}
fn extract_interpolated_vars(statement: &str) -> Vec<String> {
let mut vars = Vec::new();
let _chars = statement.chars().peekable();
let mut i = 0;
let bytes = statement.as_bytes();
while i < bytes.len() {
let is_interp = match bytes[i] {
b'{' => {
i + 1 < bytes.len() && bytes[i + 1] != b'{'
}
b'$' | b'#' => {
i + 1 < bytes.len() && bytes[i + 1] == b'{'
}
_ => false,
};
if is_interp {
let brace_start = if bytes[i] == b'{' { i } else { i + 1 };
if brace_start + 1 < bytes.len() {
if let Some(close) = statement[brace_start + 1..].find('}') {
let inner = &statement[brace_start + 1..brace_start + 1 + close];
let inner = inner.trim();
let var_name = inner
.split(|c: char| !c.is_alphanumeric() && c != '_')
.next()
.unwrap_or("");
if is_valid_identifier(var_name) {
vars.push(var_name.to_string());
}
i = brace_start + 1 + close + 1;
continue;
}
}
}
if i + 8 < bytes.len() && &statement[i..i + 8] == ".format(" {
let args_start = i + 8;
if let Some(close) = statement[args_start..].find(')') {
let args_str = &statement[args_start..args_start + close];
for arg in args_str.split(',') {
let arg = arg.trim();
let val = if let Some(eq_pos) = arg.find('=') {
arg[eq_pos + 1..].trim()
} else {
arg
};
let var_name = val
.split(|c: char| !c.is_alphanumeric() && c != '_')
.next()
.unwrap_or("");
if is_valid_identifier(var_name) {
vars.push(var_name.to_string());
}
}
i = args_start + close + 1;
continue;
}
}
if bytes[i] == b'%' && i > 0 {
let before = statement[..i].trim_end();
let after = statement[i + 1..].trim_start();
if (before.ends_with('"') || before.ends_with('\'')) && !after.starts_with('%') {
let args_str = if after.starts_with('(') {
if let Some(close) = after.find(')') {
&after[1..close]
} else {
""
}
} else {
after
.split(|c: char| c.is_whitespace() || c == ')' || c == ',')
.next()
.unwrap_or("")
};
for arg in args_str.split(',') {
let arg = arg.trim();
let var_name = arg
.split(|c: char| !c.is_alphanumeric() && c != '_')
.next()
.unwrap_or("");
if is_valid_identifier(var_name) {
vars.push(var_name.to_string());
}
}
}
}
i += 1;
}
vars.sort();
vars.dedup();
vars
}
fn is_valid_identifier(s: &str) -> bool {
!s.is_empty()
&& s.chars()
.next()
.map(|c| c.is_alphabetic() || c == '_')
.unwrap_or(false)
&& s.chars().all(|c| c.is_alphanumeric() || c == '_')
}
fn identifier_in_text(text: &str, ident: &str) -> bool {
let bytes = text.as_bytes();
let ident_len = ident.len();
if ident_len == 0 || ident_len > bytes.len() {
return false;
}
let mut pos = 0;
while pos + ident_len <= bytes.len() {
match text[pos..].find(ident) {
Some(offset) => {
let abs = pos + offset;
let before_ok = abs == 0 || {
let c = bytes[abs - 1];
!c.is_ascii_alphanumeric() && c != b'_'
};
let after_pos = abs + ident_len;
let after_ok = after_pos >= bytes.len() || {
let c = bytes[after_pos];
!c.is_ascii_alphanumeric() && c != b'_'
};
if before_ok && after_ok {
return true;
}
pos = abs + 1;
}
None => break,
}
}
false
}
#[allow(dead_code)]
pub fn is_constant_string(statement: &str) -> bool {
lazy_static! {
static ref CONST_STRING: Regex = Regex::new(r#"^\s*\w+\s*=\s*["'][^"']*["']\s*$"#).unwrap();
}
CONST_STRING.is_match(statement)
}
#[allow(dead_code)]
pub fn is_orm_safe_pattern(statement: &str) -> bool {
lazy_static! {
static ref ORM_SAFE: Regex =
Regex::new(r"(\.filter\s*\(|\.where\s*\(|\.filter_by\s*\()").unwrap();
}
ORM_SAFE.is_match(statement)
}
use super::ast_utils::{
call_node_kinds, extract_call_name, extract_member_access_receiver_and_field,
find_parent_assignment_var, is_in_comment, is_in_string, node_text, string_node_kinds,
walk_descendants,
};
struct AstSourcePattern {
call_names: &'static [&'static str],
member_patterns: &'static [(&'static str, &'static str)],
source_type: TaintSourceType,
}
struct AstSinkPattern {
call_names: &'static [&'static str],
member_patterns: &'static [(&'static str, &'static str)],
sink_type: TaintSinkType,
}
struct AstSanitizerPattern {
call_names: &'static [&'static str],
member_patterns: &'static [(&'static str, &'static str)],
sanitizer_type: SanitizerType,
}
struct AstLanguagePatterns {
sources: &'static [AstSourcePattern],
sinks: &'static [AstSinkPattern],
sanitizers: &'static [AstSanitizerPattern],
}
static PYTHON_AST_SOURCES: &[AstSourcePattern] = &[
AstSourcePattern {
call_names: &["input"],
member_patterns: &[],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[
("request", "args"),
("request", "form"),
("request", "values"),
("request", "cookies"),
("request", "headers"),
],
source_type: TaintSourceType::HttpParam,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("request", "json"), ("request", "data")],
source_type: TaintSourceType::HttpParam,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("request", "get_json")],
source_type: TaintSourceType::HttpBody,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("sys", "stdin")],
source_type: TaintSourceType::Stdin,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("os", "environ"), ("os", "getenv")],
source_type: TaintSourceType::EnvVar,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("*", "read"), ("*", "readlines"), ("*", "readline")],
source_type: TaintSourceType::FileRead,
},
];
static PYTHON_AST_SINKS: &[AstSinkPattern] = &[
AstSinkPattern {
call_names: &[],
member_patterns: &[("*", "execute"), ("*", "executemany")],
sink_type: TaintSinkType::SqlQuery,
},
AstSinkPattern {
call_names: &["eval"],
member_patterns: &[],
sink_type: TaintSinkType::CodeEval,
},
AstSinkPattern {
call_names: &["exec"],
member_patterns: &[],
sink_type: TaintSinkType::CodeExec,
},
AstSinkPattern {
call_names: &["compile"],
member_patterns: &[],
sink_type: TaintSinkType::CodeCompile,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("subprocess", "run"),
("subprocess", "call"),
("subprocess", "Popen"),
("subprocess", "check_output"),
],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("os", "system"), ("os", "popen")],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("os", "spawnl"),
("os", "spawnle"),
("os", "spawnlp"),
("os", "spawnlpe"),
("os", "spawnv"),
("os", "spawnve"),
("os", "spawnvp"),
("os", "spawnvpe"),
],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("*", "write")],
sink_type: TaintSinkType::FileWrite,
},
AstSinkPattern {
call_names: &["Markup", "mark_safe"],
member_patterns: &[
("", "|safe"),
("response", "write"),
("Response", "set_data"),
],
sink_type: TaintSinkType::HtmlOutput,
},
AstSinkPattern {
call_names: &["open", "Path"],
member_patterns: &[
("os.path", "join"),
("shutil", "copy"),
("shutil", "move"),
],
sink_type: TaintSinkType::FileOpen,
},
AstSinkPattern {
call_names: &["urlopen"],
member_patterns: &[
("requests", "get"),
("requests", "post"),
("requests", "put"),
("requests", "delete"),
("requests", "head"),
("requests", "patch"),
("requests", "request"),
("urllib.request", "urlopen"),
("httpx", "get"),
("httpx", "post"),
("httpx", "request"),
("aiohttp", "ClientSession"),
],
sink_type: TaintSinkType::HttpRequest,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("pickle", "load"),
("pickle", "loads"),
("yaml", "load"),
("yaml", "unsafe_load"),
],
sink_type: TaintSinkType::Deserialize,
},
];
static PYTHON_AST_SANITIZERS: &[AstSanitizerPattern] = &[
AstSanitizerPattern {
call_names: &["int", "float", "bool"],
member_patterns: &[],
sanitizer_type: SanitizerType::Numeric,
},
AstSanitizerPattern {
call_names: &[],
member_patterns: &[("shlex", "quote"), ("pipes", "quote")],
sanitizer_type: SanitizerType::Shell,
},
AstSanitizerPattern {
call_names: &[],
member_patterns: &[
("html", "escape"),
("markupsafe", "escape"),
("cgi", "escape"),
],
sanitizer_type: SanitizerType::Html,
},
];
static TYPESCRIPT_AST_SOURCES: &[AstSourcePattern] = &[
AstSourcePattern {
call_names: &[],
member_patterns: &[("req", "body")],
source_type: TaintSourceType::HttpBody,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[
("req", "params"),
("req", "query"),
("req", "cookies"),
("req", "headers"),
],
source_type: TaintSourceType::HttpParam,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("process", "env")],
source_type: TaintSourceType::EnvVar,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("process", "stdin")],
source_type: TaintSourceType::Stdin,
},
AstSourcePattern {
call_names: &["readline"],
member_patterns: &[],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("*", "read"), ("*", "readFile")],
source_type: TaintSourceType::FileRead,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[
("request", "json"),
("request", "text"),
("request", "formData"),
("request", "body"),
("request", "raw"),
],
source_type: TaintSourceType::HttpBody,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[
("request", "headers"),
("request", "cookies"),
("request", "params"),
("request", "query"),
("searchParams", "get"),
("searchParams", "getAll"),
("searchParams", "has"),
],
source_type: TaintSourceType::HttpParam,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[
("", "request.nextUrl.searchParams"),
("", "headers().get("),
("", "cookies().get("),
],
source_type: TaintSourceType::HttpParam,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[
("", "@Body("),
("", "@Req("),
("", "@Request("),
("", "@UploadedFile("),
("", "@UploadedFiles("),
],
source_type: TaintSourceType::HttpBody,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[
("", "@Query("),
("", "@Param("),
("", "@Headers("),
("", "@Cookies("),
],
source_type: TaintSourceType::HttpParam,
},
];
static TYPESCRIPT_AST_SINKS: &[AstSinkPattern] = &[
AstSinkPattern {
call_names: &["eval"],
member_patterns: &[],
sink_type: TaintSinkType::CodeEval,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("", "new Function")],
sink_type: TaintSinkType::CodeEval,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("child_process", "exec"),
("child_process", "spawn"),
("child_process", "execSync"),
("child_process", "execFile"),
],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &["execSync"],
member_patterns: &[],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("*", "innerHTML")],
sink_type: TaintSinkType::FileWrite,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("document", "write")],
sink_type: TaintSinkType::FileWrite,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("*", "query"), ("*", "execute")],
sink_type: TaintSinkType::SqlQuery,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("NextResponse", "redirect"),
("Response", "redirect"),
],
sink_type: TaintSinkType::OpenRedirect,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("", "redirect")],
sink_type: TaintSinkType::OpenRedirect,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("", "dangerouslySetInnerHTML")],
sink_type: TaintSinkType::FileWrite,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("reply", "redirect"),
],
sink_type: TaintSinkType::OpenRedirect,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("reply", "header"),
],
sink_type: TaintSinkType::FileWrite,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("res", "redirect"),
("response", "redirect"),
],
sink_type: TaintSinkType::OpenRedirect,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("reply", "send"),
("res", "send"),
("Response", "send"),
("response", "send"),
],
sink_type: TaintSinkType::HtmlOutput,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("NextResponse", "json"),
("NextResponse", "redirect"),
],
sink_type: TaintSinkType::HtmlOutput,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("*", "outerHTML"),
("document", "writeln"),
("*", "html"),
],
sink_type: TaintSinkType::HtmlOutput,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("fs", "readFile"),
("fs", "writeFile"),
("fs", "readFileSync"),
("fs", "writeFileSync"),
("path", "join"),
],
sink_type: TaintSinkType::FileOpen,
},
AstSinkPattern {
call_names: &["fetch", "got", "axios"],
member_patterns: &[
("axios", "get"),
("axios", "post"),
("axios", "put"),
("axios", "delete"),
("axios", "request"),
("http", "get"),
("http", "request"),
("https", "get"),
("https", "request"),
("superagent", "get"),
("", "node-fetch("),
],
sink_type: TaintSinkType::HttpRequest,
},
AstSinkPattern {
call_names: &["unserialize"],
member_patterns: &[
("serialize", "unserialize"),
("", "node-serialize"),
],
sink_type: TaintSinkType::Deserialize,
},
];
static TYPESCRIPT_AST_SANITIZERS: &[AstSanitizerPattern] = &[
AstSanitizerPattern {
call_names: &["parseInt", "Number", "parseFloat"],
member_patterns: &[],
sanitizer_type: SanitizerType::Numeric,
},
AstSanitizerPattern {
call_names: &[],
member_patterns: &[("*", "parse"), ("*", "safeParse")],
sanitizer_type: SanitizerType::Numeric,
},
AstSanitizerPattern {
call_names: &["encodeURIComponent"],
member_patterns: &[("DOMPurify", "sanitize")],
sanitizer_type: SanitizerType::Html,
},
];
static GO_AST_SOURCES: &[AstSourcePattern] = &[
AstSourcePattern {
call_names: &[],
member_patterns: &[
("fmt", "Scan"),
("bufio", "NewReader"),
("bufio", "NewScanner"),
],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[
("r", "FormValue"),
("r", "PostFormValue"),
("r.URL", "Query"),
("*", "Query"),
],
source_type: TaintSourceType::HttpParam,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("r", "Body"), ("", ".ReadAll(r.Body)")],
source_type: TaintSourceType::HttpBody,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("os", "Getenv")],
source_type: TaintSourceType::EnvVar,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("os", "Stdin")],
source_type: TaintSourceType::Stdin,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("os", "Open"), ("ioutil", "ReadFile")],
source_type: TaintSourceType::FileRead,
},
];
static GO_AST_SINKS: &[AstSinkPattern] = &[
AstSinkPattern {
call_names: &[],
member_patterns: &[("exec", "Command")],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("db", "Exec"), ("db", "Query"), ("db", "QueryRow")],
sink_type: TaintSinkType::SqlQuery,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("template", "HTML"), ("fmt", "Fprintf")],
sink_type: TaintSinkType::FileWrite,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("os", "Open"),
("os", "Create"),
("ioutil", "ReadFile"),
("ioutil", "WriteFile"),
("filepath", "Join"),
],
sink_type: TaintSinkType::FileOpen,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("http", "Get"),
("http", "Post"),
("http", "PostForm"),
("http", "Head"),
("http", "NewRequest"),
("http", "NewRequestWithContext"),
],
sink_type: TaintSinkType::HttpRequest,
},
];
static GO_AST_SANITIZERS: &[AstSanitizerPattern] = &[
AstSanitizerPattern {
call_names: &[],
member_patterns: &[
("strconv", "Atoi"),
("strconv", "ParseInt"),
("strconv", "ParseFloat"),
],
sanitizer_type: SanitizerType::Numeric,
},
AstSanitizerPattern {
call_names: &[],
member_patterns: &[("html", "EscapeString"), ("url", "QueryEscape")],
sanitizer_type: SanitizerType::Html,
},
];
static JAVA_AST_SOURCES: &[AstSourcePattern] = &[
AstSourcePattern {
call_names: &[],
member_patterns: &[("", "new Scanner(System.in)")],
source_type: TaintSourceType::Stdin,
},
AstSourcePattern {
call_names: &["readLine"],
member_patterns: &[("", "new BufferedReader")],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("request", "getParameter"), ("*", "getQueryString")],
source_type: TaintSourceType::HttpParam,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("System", "getenv")],
source_type: TaintSourceType::EnvVar,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("", "new FileReader"), ("Files", "readAllLines")],
source_type: TaintSourceType::FileRead,
},
];
static JAVA_AST_SINKS: &[AstSinkPattern] = &[
AstSinkPattern {
call_names: &[],
member_patterns: &[
("", "Runtime.getRuntime().exec"),
("", "ProcessBuilder"),
],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("*", "execute"),
("*", "executeQuery"),
("*", "executeUpdate"),
],
sink_type: TaintSinkType::SqlQuery,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("Class", "forName")],
sink_type: TaintSinkType::CodeEval,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("Files", "readString"),
("Files", "writeString"),
("Paths", "get"),
("", "new File("),
("", "new java.io.File("),
],
sink_type: TaintSinkType::FileOpen,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("URI", "create"),
("HttpRequest", "newBuilder"),
("HttpClient", "newHttpClient"),
("*", "openConnection"),
("*", "openStream"),
("*", "send"),
("*", "getForObject"),
("*", "postForObject"),
("", "URL("),
("", "RestTemplate"),
],
sink_type: TaintSinkType::HttpRequest,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("*", "readObject"),
("", "ObjectInputStream"),
("", "XMLDecoder"),
("", "new java.io.ObjectInputStream("),
],
sink_type: TaintSinkType::Deserialize,
},
];
static JAVA_AST_SANITIZERS: &[AstSanitizerPattern] = &[
AstSanitizerPattern {
call_names: &[],
member_patterns: &[
("Integer", "parseInt"),
("Long", "parseLong"),
("Double", "parseDouble"),
],
sanitizer_type: SanitizerType::Numeric,
},
AstSanitizerPattern {
call_names: &[],
member_patterns: &[
("ESAPI", "encoder"),
("StringEscapeUtils", "escapeHtml"),
],
sanitizer_type: SanitizerType::Html,
},
];
static RUST_AST_SOURCES: &[AstSourcePattern] = &[
AstSourcePattern {
call_names: &[],
member_patterns: &[("", "io::stdin"), ("", "std::io::stdin")],
source_type: TaintSourceType::Stdin,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("", "env::var"), ("", "std::env::var")],
source_type: TaintSourceType::EnvVar,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("", "env::args"), ("", "std::env::args")],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[
("", "fs::read_to_string"),
("", "std::fs::read_to_string"),
("", "File::open"),
],
source_type: TaintSourceType::FileRead,
},
];
static RUST_AST_SINKS: &[AstSinkPattern] = &[
AstSinkPattern {
call_names: &[],
member_patterns: &[("", "Command::new"), ("", "std::process::Command")],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("", "unsafe")],
sink_type: TaintSinkType::CodeEval,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("", "std::ptr::write"), ("", "std::ptr::read")],
sink_type: TaintSinkType::FileWrite,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("", "std::fs::read_to_string"),
("", "std::fs::write"),
("", "File::open"),
("", "PathBuf::from"),
],
sink_type: TaintSinkType::FileOpen,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("client", "get"),
("client", "post"),
("agent", "get"),
("agent", "post"),
("http", "get"),
("http", "post"),
("request_builder", "get"),
("request_builder", "post"),
("req", "get"),
("req", "post"),
("", "reqwest::get"),
("", "reqwest::Client"),
("", "reqwest::blocking::get"),
("", "reqwest::blocking::Client"),
("", "ureq::get"),
("", "ureq::post"),
("", "hyper::Client"),
("", "Url::parse"),
],
sink_type: TaintSinkType::HttpRequest,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("", "serde_json::from_str"),
("", "serde_yaml::from_str"),
("", "bincode::deserialize"),
],
sink_type: TaintSinkType::Deserialize,
},
];
static RUST_AST_SANITIZERS: &[AstSanitizerPattern] = &[AstSanitizerPattern {
call_names: &[],
member_patterns: &[
("", ".parse::<i32>"),
("", ".parse::<i64>"),
("", ".parse::<u32>"),
("", ".parse::<u64>"),
("", ".parse::<f32>"),
("", ".parse::<f64>"),
("", ".parse::<usize>"),
("", ".parse::<isize>"),
],
sanitizer_type: SanitizerType::Numeric,
}];
static C_AST_SOURCES: &[AstSourcePattern] = &[
AstSourcePattern {
call_names: &["scanf", "fscanf", "sscanf", "fgets", "gets", "getchar"],
member_patterns: &[],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &["getenv"],
member_patterns: &[],
source_type: TaintSourceType::EnvVar,
},
AstSourcePattern {
call_names: &["fread", "fopen"],
member_patterns: &[],
source_type: TaintSourceType::FileRead,
},
AstSourcePattern {
call_names: &["recv", "recvfrom"],
member_patterns: &[],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("", "argv[")],
source_type: TaintSourceType::UserInput,
},
];
static C_AST_SINKS: &[AstSinkPattern] = &[
AstSinkPattern {
call_names: &["system", "popen", "execl", "execv", "execvp"],
member_patterns: &[],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &["sprintf", "vsprintf"],
member_patterns: &[],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &["strcpy", "strcat", "strncpy"],
member_patterns: &[],
sink_type: TaintSinkType::FileWrite,
},
AstSinkPattern {
call_names: &["fopen", "open", "freopen"],
member_patterns: &[],
sink_type: TaintSinkType::FileOpen,
},
AstSinkPattern {
call_names: &["mysql_query", "PQexec", "sqlite3_exec"],
member_patterns: &[],
sink_type: TaintSinkType::SqlQuery,
},
];
static C_AST_SANITIZERS: &[AstSanitizerPattern] = &[
AstSanitizerPattern {
call_names: &["atoi", "atol", "atof", "strtol", "strtoul", "strtod"],
member_patterns: &[],
sanitizer_type: SanitizerType::Numeric,
},
AstSanitizerPattern {
call_names: &["snprintf"],
member_patterns: &[],
sanitizer_type: SanitizerType::Shell,
},
];
static CPP_AST_SOURCES: &[AstSourcePattern] = &[
AstSourcePattern {
call_names: &["getline"],
member_patterns: &[("", "std::cin"), ("", "std::getline")],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &["getenv", "std::getenv"],
member_patterns: &[],
source_type: TaintSourceType::EnvVar,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("", "std::ifstream"), ("", "std::fstream")],
source_type: TaintSourceType::FileRead,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("", "argv[")],
source_type: TaintSourceType::UserInput,
},
];
static CPP_AST_SINKS: &[AstSinkPattern] = &[
AstSinkPattern {
call_names: &["system", "popen"],
member_patterns: &[("", "std::system")],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &["sprintf"],
member_patterns: &[],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &["fopen", "std::fopen", "std::freopen"],
member_patterns: &[("", "std::ifstream"), ("", "std::ofstream")],
sink_type: TaintSinkType::FileOpen,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("", "boost::archive::text_iarchive"),
("", "cereal::BinaryInputArchive"),
],
sink_type: TaintSinkType::Deserialize,
},
AstSinkPattern {
call_names: &["mysql_query", "PQexec", "sqlite3_exec"],
member_patterns: &[],
sink_type: TaintSinkType::SqlQuery,
},
];
static CPP_AST_SANITIZERS: &[AstSanitizerPattern] = &[
AstSanitizerPattern {
call_names: &[
"std::stoi",
"std::stol",
"std::stoul",
"std::stoll",
"std::stof",
"std::stod",
],
member_patterns: &[],
sanitizer_type: SanitizerType::Numeric,
},
AstSanitizerPattern {
call_names: &[
"static_cast<int>",
"static_cast<long>",
"static_cast<float>",
"static_cast<double>",
],
member_patterns: &[],
sanitizer_type: SanitizerType::Numeric,
},
];
static RUBY_AST_SOURCES: &[AstSourcePattern] = &[
AstSourcePattern {
call_names: &[],
member_patterns: &[
("STDIN", "read"),
("STDIN", "gets"),
("STDIN", "readline"),
],
source_type: TaintSourceType::Stdin,
},
AstSourcePattern {
call_names: &["gets"],
member_patterns: &[],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("", "params[")],
source_type: TaintSourceType::HttpParam,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("", "ENV[")],
source_type: TaintSourceType::EnvVar,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("File", "read"), ("File", "open")],
source_type: TaintSourceType::FileRead,
},
];
static RUBY_AST_SINKS: &[AstSinkPattern] = &[
AstSinkPattern {
call_names: &["eval"],
member_patterns: &[],
sink_type: TaintSinkType::CodeEval,
},
AstSinkPattern {
call_names: &["system", "exec"],
member_patterns: &[],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("IO", "popen")],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("*", "send")],
sink_type: TaintSinkType::CodeEval,
},
AstSinkPattern {
call_names: &["raw"],
member_patterns: &[("*", "html_safe"), ("", "render html:")],
sink_type: TaintSinkType::HtmlOutput,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("File", "open"),
("File", "read"),
("File", "write"),
("", "Pathname.new("),
],
sink_type: TaintSinkType::FileOpen,
},
AstSinkPattern {
call_names: &["open"],
member_patterns: &[
("", "Net::HTTP.get"),
("", "Net::HTTP.post"),
("", "Net::HTTP.start"),
("", "URI.open"),
("", "URI.parse"),
("", "RestClient.get"),
("", "RestClient.post"),
("", "HTTParty.get"),
],
sink_type: TaintSinkType::HttpRequest,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("Marshal", "load"),
("YAML", "load"),
("Psych", "load"),
],
sink_type: TaintSinkType::Deserialize,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("", "ActiveRecord::Base.connection.execute"),
("", "raw_sql("),
("connection", "execute"),
],
sink_type: TaintSinkType::SqlQuery,
},
];
static RUBY_AST_SANITIZERS: &[AstSanitizerPattern] = &[
AstSanitizerPattern {
call_names: &[],
member_patterns: &[("*", "to_i"), ("*", "to_f")],
sanitizer_type: SanitizerType::Numeric,
},
AstSanitizerPattern {
call_names: &[],
member_patterns: &[
("", "CGI.escapeHTML"),
("", "Rack::Utils.escape_html"),
],
sanitizer_type: SanitizerType::Html,
},
];
static KOTLIN_AST_SOURCES: &[AstSourcePattern] = &[
AstSourcePattern {
call_names: &["readLine", "readln"],
member_patterns: &[],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("System", "getenv")],
source_type: TaintSourceType::EnvVar,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("", "BufferedReader")],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("request", "getParameter")],
source_type: TaintSourceType::HttpParam,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[
("", "call.request.queryParameters"),
("", "call.parameters["),
("", "call.receive<"),
],
source_type: TaintSourceType::HttpParam,
},
];
static KOTLIN_AST_SINKS: &[AstSinkPattern] = &[
AstSinkPattern {
call_names: &[],
member_patterns: &[
("", "Runtime.getRuntime().exec"),
("", "ProcessBuilder"),
],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("*", "execute"),
("*", "executeQuery"),
("", "prepareStatement"),
],
sink_type: TaintSinkType::SqlQuery,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("Files", "readString"),
("Files", "writeString"),
("Paths", "get"),
("", "File("),
],
sink_type: TaintSinkType::FileOpen,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("*", "readObject"),
("", "ObjectInputStream("),
],
sink_type: TaintSinkType::Deserialize,
},
];
static KOTLIN_AST_SANITIZERS: &[AstSanitizerPattern] = &[AstSanitizerPattern {
call_names: &[],
member_patterns: &[
("*", "toInt"),
("*", "toLong"),
("*", "toDouble"),
("*", "toFloat"),
],
sanitizer_type: SanitizerType::Numeric,
}];
static SWIFT_AST_SOURCES: &[AstSourcePattern] = &[
AstSourcePattern {
call_names: &["readLine"],
member_patterns: &[],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("ProcessInfo.processInfo", "environment")],
source_type: TaintSourceType::EnvVar,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[
("FileManager", "default"),
("", "URLSession"),
],
source_type: TaintSourceType::FileRead,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("", "CommandLine.arguments")],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("", "request.query[")],
source_type: TaintSourceType::HttpParam,
},
];
static SWIFT_AST_SINKS: &[AstSinkPattern] = &[
AstSinkPattern {
call_names: &["system"],
member_patterns: &[
("", "Process()"),
("", "NSTask"),
("Process", "launchedProcess"),
("Process", "run"),
],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &["sqlite3_exec"],
member_patterns: &[("*", "executeQuery"), ("*", "prepareStatement")],
sink_type: TaintSinkType::SqlQuery,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("", "String(contentsOfFile:"),
("", "Data(contentsOf:"),
("", "FileManager.default.contents(atPath:"),
("", "FileHandle(forReadingAtPath:"),
("", "FileHandle(forWritingAtPath:"),
],
sink_type: TaintSinkType::FileOpen,
},
];
static SWIFT_AST_SANITIZERS: &[AstSanitizerPattern] = &[
AstSanitizerPattern {
call_names: &["Int", "Double", "Float"],
member_patterns: &[],
sanitizer_type: SanitizerType::Numeric,
},
AstSanitizerPattern {
call_names: &[],
member_patterns: &[("*", "addingPercentEncoding")],
sanitizer_type: SanitizerType::Html,
},
];
static CSHARP_AST_SOURCES: &[AstSourcePattern] = &[
AstSourcePattern {
call_names: &[],
member_patterns: &[("Console", "ReadLine")],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("Request", "QueryString"), ("Request", "Form")],
source_type: TaintSourceType::HttpParam,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("", "Request.Query["), ("", "Request.Form[")],
source_type: TaintSourceType::HttpParam,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("Environment", "GetEnvironmentVariable")],
source_type: TaintSourceType::EnvVar,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[
("File", "ReadAllText"),
("File", "ReadAllLines"),
("File", "OpenRead"),
("", "StreamReader"),
],
source_type: TaintSourceType::FileRead,
},
];
static CSHARP_AST_SINKS: &[AstSinkPattern] = &[
AstSinkPattern {
call_names: &[],
member_patterns: &[("Process", "Start"), ("", "Process.Start")],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("", "SqlCommand"),
("*", "ExecuteNonQuery"),
("*", "ExecuteReader"),
],
sink_type: TaintSinkType::SqlQuery,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("Activator", "CreateInstance")],
sink_type: TaintSinkType::CodeEval,
},
AstSinkPattern {
call_names: &["AppendHtml"],
member_patterns: &[
("Html", "Raw"),
("", "@Html.Raw("),
("Response", "Write"),
],
sink_type: TaintSinkType::HtmlOutput,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("File", "Open"),
("File", "ReadAllText"),
("File", "WriteAllText"),
("Path", "Combine"),
("", "System.IO.File.Open"),
],
sink_type: TaintSinkType::FileOpen,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("BinaryFormatter", "Deserialize"),
("NetDataContractSerializer", "Deserialize"),
("", "JavaScriptSerializer("),
("", "new XmlSerializer"),
("", "new SoapFormatter"),
],
sink_type: TaintSinkType::Deserialize,
},
];
static CSHARP_AST_SANITIZERS: &[AstSanitizerPattern] = &[
AstSanitizerPattern {
call_names: &[],
member_patterns: &[
("int", "Parse"),
("Convert", "ToInt32"),
("double", "Parse"),
],
sanitizer_type: SanitizerType::Numeric,
},
AstSanitizerPattern {
call_names: &[],
member_patterns: &[("HttpUtility", "HtmlEncode")],
sanitizer_type: SanitizerType::Html,
},
];
static SCALA_AST_SOURCES: &[AstSourcePattern] = &[
AstSourcePattern {
call_names: &[],
member_patterns: &[
("StdIn", "readLine"),
("", "scala.io.StdIn"),
],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("System", "getenv")],
source_type: TaintSourceType::EnvVar,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("Source", "fromFile")],
source_type: TaintSourceType::FileRead,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[
("request", "getQueryString"),
("request", "queryString"),
("request", "body"),
],
source_type: TaintSourceType::HttpParam,
},
];
static SCALA_AST_SINKS: &[AstSinkPattern] = &[
AstSinkPattern {
call_names: &[],
member_patterns: &[
("", "Runtime.getRuntime.exec"),
("sys", "process"),
("", "Process("),
],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("*", "execute"), ("*", "executeQuery")],
sink_type: TaintSinkType::SqlQuery,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("Source", "fromFile"),
("Files", "readString"),
("Files", "writeString"),
("Paths", "get"),
("", "scala.io.Source.fromFile"),
],
sink_type: TaintSinkType::FileOpen,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("*", "readObject"),
("", "ObjectInputStream("),
("", "new java.io.ObjectInputStream("),
],
sink_type: TaintSinkType::Deserialize,
},
];
static SCALA_AST_SANITIZERS: &[AstSanitizerPattern] = &[
AstSanitizerPattern {
call_names: &[],
member_patterns: &[("*", "toInt"), ("*", "toLong"), ("*", "toDouble")],
sanitizer_type: SanitizerType::Numeric,
},
AstSanitizerPattern {
call_names: &[],
member_patterns: &[("StringEscapeUtils", "escapeHtml")],
sanitizer_type: SanitizerType::Html,
},
];
static PHP_AST_SOURCES: &[AstSourcePattern] = &[
AstSourcePattern {
call_names: &[],
member_patterns: &[
("", "$_GET["),
("", "$_REQUEST["),
("", "$_COOKIE["),
("", "$_SERVER["),
],
source_type: TaintSourceType::HttpParam,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("", "$_POST[")],
source_type: TaintSourceType::HttpBody,
},
AstSourcePattern {
call_names: &["fgets"],
member_patterns: &[],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &["file_get_contents"],
member_patterns: &[],
source_type: TaintSourceType::FileRead,
},
AstSourcePattern {
call_names: &["getenv"],
member_patterns: &[("", "$_ENV[")],
source_type: TaintSourceType::EnvVar,
},
];
static PHP_AST_SINKS: &[AstSinkPattern] = &[
AstSinkPattern {
call_names: &["eval"],
member_patterns: &[],
sink_type: TaintSinkType::CodeEval,
},
AstSinkPattern {
call_names: &[
"exec",
"system",
"passthru",
"shell_exec",
"popen",
"proc_open",
],
member_patterns: &[],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &["mysqli_query"],
member_patterns: &[("", "->query(")],
sink_type: TaintSinkType::SqlQuery,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("", "echo "),
("", "print "),
("", "<?= "),
],
sink_type: TaintSinkType::HtmlOutput,
},
AstSinkPattern {
call_names: &["fopen", "file_get_contents", "file_put_contents"],
member_patterns: &[
("", "include("),
("", "require("),
],
sink_type: TaintSinkType::FileOpen,
},
AstSinkPattern {
call_names: &[
"fopen",
"file_get_contents",
"curl_exec",
"curl_setopt",
"get_headers",
"readfile",
],
member_patterns: &[
("", "Guzzle\\Client"),
("", "->request("),
],
sink_type: TaintSinkType::HttpRequest,
},
AstSinkPattern {
call_names: &["unserialize", "yaml_parse"],
member_patterns: &[],
sink_type: TaintSinkType::Deserialize,
},
];
static PHP_AST_SANITIZERS: &[AstSanitizerPattern] = &[
AstSanitizerPattern {
call_names: &["intval", "floatval"],
member_patterns: &[("", "(int)"), ("", "(float)")],
sanitizer_type: SanitizerType::Numeric,
},
AstSanitizerPattern {
call_names: &["htmlspecialchars", "htmlentities"],
member_patterns: &[],
sanitizer_type: SanitizerType::Html,
},
AstSanitizerPattern {
call_names: &["mysqli_real_escape_string"],
member_patterns: &[],
sanitizer_type: SanitizerType::Shell,
},
];
static LUA_AST_SOURCES: &[AstSourcePattern] = &[
AstSourcePattern {
call_names: &[],
member_patterns: &[("io", "read")],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("os", "getenv")],
source_type: TaintSourceType::EnvVar,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("io", "open")],
source_type: TaintSourceType::FileRead,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[
("", "ngx.req.get_uri_args"),
("", "ngx.req.get_post_args"),
("", "ngx.req.get_headers"),
],
source_type: TaintSourceType::HttpParam,
},
];
static LUA_AST_SINKS: &[AstSinkPattern] = &[
AstSinkPattern {
call_names: &[],
member_patterns: &[("os", "execute")],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("io", "popen")],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &["loadstring", "load", "dofile", "loadfile"],
member_patterns: &[],
sink_type: TaintSinkType::CodeEval,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("ngx", "say"), ("ngx", "print")],
sink_type: TaintSinkType::HtmlOutput,
},
AstSinkPattern {
call_names: &["dofile", "loadfile"],
member_patterns: &[("io", "open")],
sink_type: TaintSinkType::FileOpen,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("", ":query("), ("", ":execute(")],
sink_type: TaintSinkType::SqlQuery,
},
];
static LUA_AST_SANITIZERS: &[AstSanitizerPattern] = &[AstSanitizerPattern {
call_names: &["tonumber"],
member_patterns: &[],
sanitizer_type: SanitizerType::Numeric,
}];
static ELIXIR_AST_SOURCES: &[AstSourcePattern] = &[
AstSourcePattern {
call_names: &[],
member_patterns: &[("IO", "gets")],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("System", "get_env")],
source_type: TaintSourceType::EnvVar,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("File", "read"), ("File", "read!")],
source_type: TaintSourceType::FileRead,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[
("", "conn.params["),
("", "conn.body_params["),
("", "conn.query_params["),
],
source_type: TaintSourceType::HttpParam,
},
];
static ELIXIR_AST_SINKS: &[AstSinkPattern] = &[
AstSinkPattern {
call_names: &[],
member_patterns: &[
("System", "cmd"),
("System", "shell"),
("Port", "open"),
("", ":os.cmd("),
],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("Code", "eval_string")],
sink_type: TaintSinkType::CodeEval,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("Ecto.Adapters.SQL", "query"),
("Ecto.Adapters.SQL", "query!"),
("Repo", "query"),
("Repo", "query!"),
],
sink_type: TaintSinkType::SqlQuery,
},
AstSinkPattern {
call_names: &["raw"],
member_patterns: &[("Phoenix.HTML", "raw")],
sink_type: TaintSinkType::HtmlOutput,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("File", "read"),
("File", "read!"),
("File", "write"),
("File", "write!"),
("File", "open!"),
("File", "stream!"),
("Path", "join"),
],
sink_type: TaintSinkType::FileOpen,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("", ":erlang.binary_to_term(")],
sink_type: TaintSinkType::Deserialize,
},
];
static ELIXIR_AST_SANITIZERS: &[AstSanitizerPattern] = &[
AstSanitizerPattern {
call_names: &[],
member_patterns: &[("", "String.to_integer"), ("", "String.to_float")],
sanitizer_type: SanitizerType::Numeric,
},
AstSanitizerPattern {
call_names: &[],
member_patterns: &[("", "Phoenix.HTML.html_escape")],
sanitizer_type: SanitizerType::Html,
},
];
static OCAML_AST_SOURCES: &[AstSourcePattern] = &[
AstSourcePattern {
call_names: &["read_line"],
member_patterns: &[],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &["input_line"],
member_patterns: &[],
source_type: TaintSourceType::UserInput,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[("Sys", "getenv")],
source_type: TaintSourceType::EnvVar,
},
AstSourcePattern {
call_names: &[],
member_patterns: &[
("In_channel", "read_all"),
("In_channel", "input_all"),
],
source_type: TaintSourceType::FileRead,
},
];
static OCAML_AST_SINKS: &[AstSinkPattern] = &[
AstSinkPattern {
call_names: &[],
member_patterns: &[("Sys", "command")],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[("Unix", "execvp")],
sink_type: TaintSinkType::ShellExec,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("Sqlite3", "exec"),
("Sqlite3", "prepare"),
("Mariadb.Stmt", "execute"),
("Postgresql", "exec"),
("Mysql", "exec"),
],
sink_type: TaintSinkType::SqlQuery,
},
AstSinkPattern {
call_names: &["open_in", "open_out"],
member_patterns: &[("Filename", "concat")],
sink_type: TaintSinkType::FileOpen,
},
AstSinkPattern {
call_names: &[],
member_patterns: &[
("Marshal", "from_channel"),
("Marshal", "from_string"),
],
sink_type: TaintSinkType::Deserialize,
},
];
static OCAML_AST_SANITIZERS: &[AstSanitizerPattern] = &[AstSanitizerPattern {
call_names: &["int_of_string", "float_of_string"],
member_patterns: &[],
sanitizer_type: SanitizerType::Numeric,
}];
fn get_ast_patterns(language: Language) -> AstLanguagePatterns {
match language {
Language::Python => AstLanguagePatterns {
sources: PYTHON_AST_SOURCES,
sinks: PYTHON_AST_SINKS,
sanitizers: PYTHON_AST_SANITIZERS,
},
Language::TypeScript | Language::JavaScript => AstLanguagePatterns {
sources: TYPESCRIPT_AST_SOURCES,
sinks: TYPESCRIPT_AST_SINKS,
sanitizers: TYPESCRIPT_AST_SANITIZERS,
},
Language::Go => AstLanguagePatterns {
sources: GO_AST_SOURCES,
sinks: GO_AST_SINKS,
sanitizers: GO_AST_SANITIZERS,
},
Language::Java => AstLanguagePatterns {
sources: JAVA_AST_SOURCES,
sinks: JAVA_AST_SINKS,
sanitizers: JAVA_AST_SANITIZERS,
},
Language::Rust => AstLanguagePatterns {
sources: RUST_AST_SOURCES,
sinks: RUST_AST_SINKS,
sanitizers: RUST_AST_SANITIZERS,
},
Language::C => AstLanguagePatterns {
sources: C_AST_SOURCES,
sinks: C_AST_SINKS,
sanitizers: C_AST_SANITIZERS,
},
Language::Cpp => AstLanguagePatterns {
sources: CPP_AST_SOURCES,
sinks: CPP_AST_SINKS,
sanitizers: CPP_AST_SANITIZERS,
},
Language::Ruby => AstLanguagePatterns {
sources: RUBY_AST_SOURCES,
sinks: RUBY_AST_SINKS,
sanitizers: RUBY_AST_SANITIZERS,
},
Language::Kotlin => AstLanguagePatterns {
sources: KOTLIN_AST_SOURCES,
sinks: KOTLIN_AST_SINKS,
sanitizers: KOTLIN_AST_SANITIZERS,
},
Language::Swift => AstLanguagePatterns {
sources: SWIFT_AST_SOURCES,
sinks: SWIFT_AST_SINKS,
sanitizers: SWIFT_AST_SANITIZERS,
},
Language::CSharp => AstLanguagePatterns {
sources: CSHARP_AST_SOURCES,
sinks: CSHARP_AST_SINKS,
sanitizers: CSHARP_AST_SANITIZERS,
},
Language::Scala => AstLanguagePatterns {
sources: SCALA_AST_SOURCES,
sinks: SCALA_AST_SINKS,
sanitizers: SCALA_AST_SANITIZERS,
},
Language::Php => AstLanguagePatterns {
sources: PHP_AST_SOURCES,
sinks: PHP_AST_SINKS,
sanitizers: PHP_AST_SANITIZERS,
},
Language::Lua | Language::Luau => AstLanguagePatterns {
sources: LUA_AST_SOURCES,
sinks: LUA_AST_SINKS,
sanitizers: LUA_AST_SANITIZERS,
},
Language::Elixir => AstLanguagePatterns {
sources: ELIXIR_AST_SOURCES,
sinks: ELIXIR_AST_SINKS,
sanitizers: ELIXIR_AST_SANITIZERS,
},
Language::Ocaml => AstLanguagePatterns {
sources: OCAML_AST_SOURCES,
sinks: OCAML_AST_SINKS,
sanitizers: OCAML_AST_SANITIZERS,
},
}
}
pub fn fastpath_pattern_strings(language: Language) -> &'static [&'static str] {
use std::sync::OnceLock;
macro_rules! fastpath_static {
($name:ident, $lang:expr) => {{
static CELL: OnceLock<Vec<&'static str>> = OnceLock::new();
CELL.get_or_init(|| build_fastpath_needles($lang))
.as_slice()
}};
}
match language {
Language::Python => fastpath_static!(PY, Language::Python),
Language::TypeScript => fastpath_static!(TS, Language::TypeScript),
Language::JavaScript => fastpath_static!(JS, Language::JavaScript),
Language::Go => fastpath_static!(GO, Language::Go),
Language::Java => fastpath_static!(JAVA, Language::Java),
Language::Rust => fastpath_static!(RUST, Language::Rust),
Language::C => fastpath_static!(C, Language::C),
Language::Cpp => fastpath_static!(CPP, Language::Cpp),
Language::Ruby => fastpath_static!(RB, Language::Ruby),
Language::Kotlin => fastpath_static!(KT, Language::Kotlin),
Language::Swift => fastpath_static!(SW, Language::Swift),
Language::CSharp => fastpath_static!(CS, Language::CSharp),
Language::Scala => fastpath_static!(SC, Language::Scala),
Language::Php => fastpath_static!(PHP, Language::Php),
Language::Lua => fastpath_static!(LUA, Language::Lua),
Language::Luau => fastpath_static!(LUAU, Language::Luau),
Language::Elixir => fastpath_static!(EX, Language::Elixir),
Language::Ocaml => fastpath_static!(OCAML, Language::Ocaml),
}
}
fn build_fastpath_needles(language: Language) -> Vec<&'static str> {
let patterns = get_ast_patterns(language);
use std::collections::HashSet;
let mut set: HashSet<&'static str> = HashSet::new();
fn intern_needle(s: String) -> &'static str {
Box::leak(s.into_boxed_str())
}
for src in patterns.sources {
for &name in src.call_names {
if !name.is_empty() {
set.insert(name);
}
}
for &(receiver, field) in src.member_patterns {
if field.is_empty() {
continue;
}
if receiver.is_empty() {
set.insert(field);
} else if receiver == "*" {
set.insert(intern_needle(format!(".{}", field)));
} else {
set.insert(intern_needle(format!("{}.{}", receiver, field)));
}
}
}
for sink in patterns.sinks {
for &name in sink.call_names {
if !name.is_empty() {
set.insert(name);
}
}
for &(receiver, field) in sink.member_patterns {
if field.is_empty() {
continue;
}
if receiver.is_empty() {
set.insert(field);
} else if receiver == "*" {
set.insert(intern_needle(format!(".{}", field)));
} else {
set.insert(intern_needle(format!("{}.{}", receiver, field)));
}
}
}
set.into_iter().collect()
}
pub fn function_body_has_taint_pattern(body_text: &str, language: Language) -> bool {
let needles = fastpath_pattern_strings(language);
needles.iter().any(|n| body_text.contains(n))
}
fn member_patterns_match(
descendant: &tree_sitter::Node,
source: &[u8],
language: Language,
member_patterns: &[(&str, &str)],
descendant_text: &str,
) -> bool {
if let Some((rcv, field)) =
extract_member_access_receiver_and_field(descendant, source, language)
{
for (pat_rcv, pat_field) in member_patterns {
if pat_rcv.is_empty() {
continue; }
if *pat_rcv == "*" {
if field == *pat_field {
return true;
}
} else if rcv == *pat_rcv && field == *pat_field {
return true;
}
}
}
let call_kinds = call_node_kinds(language);
if call_kinds.contains(&descendant.kind()) {
if let Some(call_name) = extract_call_name(descendant, source, language) {
if let Some(dot_pos) = call_name.rfind('.') {
let rcv = &call_name[..dot_pos];
let field = &call_name[dot_pos + 1..];
for (pat_rcv, pat_field) in member_patterns {
if pat_rcv.is_empty() {
continue;
}
if *pat_rcv == "*" {
if field == *pat_field {
return true;
}
} else if rcv == *pat_rcv && field == *pat_field {
return true;
}
}
}
}
}
for (pat_rcv, pat_field) in member_patterns {
if pat_rcv.is_empty() && descendant_text.contains(pat_field) {
return true;
}
}
false
}
fn extract_first_identifier_arg_ast(
descendant: &tree_sitter::Node,
source: &[u8],
language: Language,
) -> Option<String> {
let string_kinds = string_node_kinds(language);
if language == Language::Php
&& matches!(
descendant.kind(),
"echo_statement" | "print_intrinsic"
)
{
let mut stack: Vec<tree_sitter::Node> = vec![*descendant];
while let Some(node) = stack.pop() {
if string_kinds.contains(&node.kind()) {
continue;
}
if matches!(node.kind(), "variable_name" | "name") && node.id() != descendant.id() {
let text = node_text(&node, source);
let head = text.trim_start_matches('$');
let head = head.split('.').next().unwrap_or(head);
let head = head.split("->").next().unwrap_or(head);
if is_valid_identifier(head) {
return Some(head.to_string());
}
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
if child.is_named() {
stack.push(child);
}
}
}
}
return None;
}
if language == Language::Ruby && descendant.kind() == "subshell" {
let mut stack: Vec<tree_sitter::Node> = vec![*descendant];
while let Some(node) = stack.pop() {
if string_kinds.contains(&node.kind()) {
continue;
}
if node.kind() == "identifier" && node.id() != descendant.id() {
let text = node_text(&node, source);
let head = text.split('.').next().unwrap_or(text);
if is_valid_identifier(head) {
return Some(head.to_string());
}
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
if child.is_named() {
stack.push(child);
}
}
}
}
return None;
}
if language == Language::Cpp && descendant.kind() == "declaration" {
for i in 0..descendant.child_count() {
let Some(init_decl) = descendant.child(i) else {
continue;
};
if !init_decl.is_named() || init_decl.kind() != "init_declarator" {
continue;
}
if let Some(value) = init_decl.child_by_field_name("value") {
if let Some(found) =
extract_first_identifier_arg_ast_descent(&value, source, language, 0)
{
return Some(found);
}
}
}
return None;
}
if language == Language::Ocaml && descendant.kind() == "application_expression" {
for i in 1..descendant.child_count() {
let Some(child) = descendant.child(i) else {
continue;
};
if !child.is_named() {
continue;
}
if string_kinds.contains(&child.kind()) {
continue;
}
let text = node_text(&child, source).trim();
if text.is_empty() {
continue;
}
let stripped = text
.trim_start_matches('(')
.trim_end_matches(')')
.trim();
let head = stripped.split('.').next().unwrap_or(stripped);
let head = head.trim_start_matches('&');
if is_valid_identifier(head) {
return Some(head.to_string());
}
}
return None;
}
let args = descendant
.child_by_field_name("arguments")
.or_else(|| {
for i in 0..descendant.child_count() {
if let Some(child) = descendant.child(i) {
let kind = child.kind();
if kind.contains("argument") || kind == "call_suffix" {
return Some(child);
}
}
}
None
})?;
let descend_kinds: &[&str] = match language {
Language::Java => &[
"object_creation_expression",
"method_invocation",
"parenthesized_expression",
],
Language::Scala => &[
"call_expression",
"instance_expression",
"infix_expression",
],
Language::Cpp => &[
"binary_expression",
"call_expression",
"parenthesized_expression",
"argument_list",
],
_ => &[],
};
for i in 0..args.child_count() {
let Some(child) = args.child(i) else {
continue;
};
if !child.is_named() {
continue;
}
if string_kinds.contains(&child.kind()) {
continue;
}
let text = node_text(&child, source).trim();
if !text.is_empty() {
let head = text.split('.').next().unwrap_or(text);
let head = head.trim_start_matches('&').trim_start_matches('$');
if is_valid_identifier(head) {
return Some(head.to_string());
}
}
if descend_kinds.contains(&child.kind()) {
if let Some(found) =
extract_first_identifier_arg_ast_descent(&child, source, language, 0)
{
return Some(found);
}
}
}
None
}
fn extract_first_identifier_arg_ast_descent(
node: &tree_sitter::Node,
source: &[u8],
language: Language,
depth: u32,
) -> Option<String> {
const MAX_DEPTH: u32 = 5;
if depth >= MAX_DEPTH {
return None;
}
let string_kinds = string_node_kinds(language);
let mut stack: Vec<(tree_sitter::Node, u32)> = Vec::new();
for i in (0..node.child_count()).rev() {
if let Some(c) = node.child(i) {
if c.is_named() {
stack.push((c, depth + 1));
}
}
}
while let Some((cur, d)) = stack.pop() {
if d >= MAX_DEPTH {
continue;
}
if string_kinds.contains(&cur.kind()) {
continue;
}
let text = node_text(&cur, source).trim();
if !text.is_empty() {
let head = text.split('.').next().unwrap_or(text);
let head = head.trim_start_matches('&').trim_start_matches('$');
if is_valid_identifier(head) {
return Some(head.to_string());
}
}
for i in (0..cur.child_count()).rev() {
if let Some(c) = cur.child(i) {
if c.is_named() {
stack.push((c, d + 1));
}
}
}
}
None
}
fn extract_assignment_rhs_ident(
descendant: &tree_sitter::Node,
source: &[u8],
line_text: &str,
) -> Option<String> {
let bytes = line_text.as_bytes();
let mut idx = line_text.len();
while idx > 0 {
if let Some(pos) = line_text[..idx].rfind('=') {
let before = if pos > 0 { bytes[pos - 1] } else { b' ' };
let after = if pos + 1 < bytes.len() {
bytes[pos + 1]
} else {
b' '
};
if before != b'=' && before != b'!' && before != b'<' && before != b'>'
&& after != b'='
{
let rhs = &line_text[pos + 1..];
let rhs = rhs.trim_start_matches(['{', ' ', '\t']);
if let Some(colon_pos) = rhs.find(':') {
let key = rhs[..colon_pos].trim();
if is_valid_identifier(key) {
let rest = rhs[colon_pos + 1..].trim();
let var = rest
.split(|c: char| !c.is_alphanumeric() && c != '_')
.next()
.unwrap_or("");
if is_valid_identifier(var) {
return Some(var.to_string());
}
}
}
let var = rhs
.split(|c: char| !c.is_alphanumeric() && c != '_')
.next()
.unwrap_or("");
if is_valid_identifier(var) {
return Some(var.to_string());
}
}
idx = pos;
} else {
break;
}
}
let _ = (descendant, source); None
}
pub fn detect_sources_ast(
root: &tree_sitter::Node,
source: &[u8],
language: Language,
line_filter: Option<u32>,
) -> Vec<TaintSource> {
let patterns = get_ast_patterns(language);
let mut sources = Vec::new();
let descendants = walk_descendants(*root);
for descendant in &descendants {
if is_in_comment(descendant, language) || is_in_string(descendant, language) {
continue;
}
let line = descendant.start_position().row as u32 + 1;
if let Some(filter) = line_filter {
if line != filter {
continue;
}
}
let text = node_text(descendant, source);
for pattern in patterns.sources {
let matched = pattern.call_names.iter().any(|name| {
let call_kinds = call_node_kinds(language);
if call_kinds.contains(&descendant.kind()) {
if let Some(call_name) = extract_call_name(descendant, source, language) {
return call_name == *name || call_name.ends_with(&format!(".{}", name));
}
}
false
}) || member_patterns_match(descendant, source, language, pattern.member_patterns, text);
if matched {
let line_text = std::str::from_utf8(source)
.unwrap_or("")
.lines()
.nth((line - 1) as usize)
.unwrap_or("");
let var = find_parent_assignment_var(descendant, source, language)
.or_else(|| extract_assigned_var(line_text))
.or_else(|| extract_first_identifier_arg_ast(descendant, source, language))
.or_else(|| extract_source_var_from_statement(line_text))
.or_else(|| {
let call_kinds = call_node_kinds(language);
if call_kinds.contains(&descendant.kind()) {
extract_call_name(descendant, source, language)
.and_then(|name| {
name.split('.').next().map(|s| s.to_string())
})
.filter(|s| is_valid_identifier(s))
} else {
None
}
});
if let Some(var) = var {
sources.push(TaintSource {
var,
line,
source_type: pattern.source_type,
statement: Some(line_text.to_string()),
});
break; }
}
}
}
if language == Language::Java {
sources.extend(detect_spring_annotation_sources(root, source, line_filter));
}
sources
}
fn detect_spring_annotation_sources(
root: &tree_sitter::Node,
source: &[u8],
line_filter: Option<u32>,
) -> Vec<TaintSource> {
let mut out = Vec::new();
let descendants = walk_descendants(*root);
for node in &descendants {
if node.kind() != "formal_parameter" {
continue;
}
let line = node.start_position().row as u32 + 1;
if let Some(filter) = line_filter {
if line != filter {
continue;
}
}
let annot = match find_first_spring_annotation(node, source) {
Some(a) => a,
None => continue,
};
let var_name = match formal_parameter_identifier(node, source) {
Some(v) => v,
None => continue,
};
let source_type = match annot.as_str() {
"RequestBody" => TaintSourceType::HttpBody,
"RequestParam" | "PathVariable" | "RequestHeader" | "ModelAttribute" => {
TaintSourceType::HttpParam
}
_ => continue,
};
let statement = std::str::from_utf8(source)
.unwrap_or("")
.lines()
.nth((line - 1) as usize)
.map(|s| s.to_string());
out.push(TaintSource {
var: var_name,
line,
source_type,
statement,
});
}
out
}
const SPRING_ANNOT_NAMES: &[&str] = &[
"RequestParam",
"PathVariable",
"RequestHeader",
"ModelAttribute",
"RequestBody",
];
fn find_first_spring_annotation(param: &tree_sitter::Node, source: &[u8]) -> Option<String> {
fn descend(node: &tree_sitter::Node, source: &[u8], out: &mut Option<String>) {
if out.is_some() {
return;
}
let kind = node.kind();
if kind == "marker_annotation" || kind == "annotation" {
let text = node_text(node, source);
let trimmed = text.trim_start_matches('@');
let head = trimmed
.split(|c: char| c == '(' || c.is_whitespace())
.next()
.unwrap_or("");
let last = head.rsplit('.').next().unwrap_or("").to_string();
if SPRING_ANNOT_NAMES.contains(&last.as_str()) {
*out = Some(last);
return;
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
descend(&child, source, out);
if out.is_some() {
return;
}
}
}
let mut found: Option<String> = None;
descend(param, source, &mut found);
found
}
fn formal_parameter_identifier(param: &tree_sitter::Node, source: &[u8]) -> Option<String> {
let mut cursor = param.walk();
let mut last_ident: Option<String> = None;
for child in param.children(&mut cursor) {
if child.kind() == "identifier" {
last_ident = Some(node_text(&child, source).to_string());
}
}
last_ident
}
pub fn detect_sinks_ast(
root: &tree_sitter::Node,
source: &[u8],
language: Language,
line_filter: Option<u32>,
) -> Vec<TaintSink> {
let patterns = get_ast_patterns(language);
let mut sinks = Vec::new();
let descendants = walk_descendants(*root);
for descendant in &descendants {
if is_in_comment(descendant, language) || is_in_string(descendant, language) {
continue;
}
let line = descendant.start_position().row as u32 + 1;
if let Some(filter) = line_filter {
if line != filter {
continue;
}
}
let text = node_text(descendant, source);
for pattern in patterns.sinks {
let matched = pattern.call_names.iter().any(|name| {
let call_kinds = call_node_kinds(language);
if call_kinds.contains(&descendant.kind()) {
if let Some(call_name) = extract_call_name(descendant, source, language) {
return call_name == *name || call_name.ends_with(&format!(".{}", name));
}
}
false
}) || member_patterns_match(descendant, source, language, pattern.member_patterns, text);
if matched {
let stmt_text = std::str::from_utf8(source)
.unwrap_or("")
.lines()
.nth((line - 1) as usize)
.unwrap_or("");
let regex_patterns = get_patterns(language);
let var = regex_patterns
.sinks
.iter()
.find(|(p, _)| p.is_match(stmt_text))
.and_then(|(p, _)| extract_call_arg(stmt_text, p))
.or_else(|| {
regex_patterns
.sinks
.iter()
.find(|(p, _)| p.is_match(stmt_text))
.and_then(|(p, _)| extract_sink_var_from_statement(stmt_text, p))
})
.or_else(|| extract_first_identifier_arg_ast(descendant, source, language))
.or_else(|| extract_assignment_rhs_ident(descendant, source, stmt_text))
.or_else(|| {
let call_kinds = call_node_kinds(language);
if call_kinds.contains(&descendant.kind()) {
extract_call_name(descendant, source, language)
.and_then(|name| {
name.split('.').next().map(|s| s.to_string())
})
.filter(|s| is_valid_identifier(s))
} else {
None
}
});
if let Some(var) = var {
sinks.push(TaintSink {
var,
line,
sink_type: pattern.sink_type,
tainted: false,
statement: Some(stmt_text.to_string()),
});
}
}
}
if language == Language::Ruby && descendant.kind() == "subshell" {
let stmt_text = std::str::from_utf8(source)
.unwrap_or("")
.lines()
.nth((line - 1) as usize)
.unwrap_or("");
let var = extract_first_identifier_arg_ast(descendant, source, language)
.or_else(|| extract_assignment_rhs_ident(descendant, source, stmt_text))
.or_else(|| extract_source_var_from_statement(stmt_text));
if let Some(var) = var {
sinks.push(TaintSink {
var,
line,
sink_type: TaintSinkType::ShellExec,
tainted: false,
statement: Some(stmt_text.to_string()),
});
continue;
}
}
if language == Language::Python && descendant.kind() == "return_statement" {
let mut returned_string: Option<tree_sitter::Node> = None;
for i in 0..descendant.child_count() {
if let Some(child) = descendant.child(i) {
if child.is_named() && child.kind() == "string" {
returned_string = Some(child);
break;
}
}
}
if let Some(string_node) = returned_string {
let mut first_interp: Option<tree_sitter::Node> = None;
for i in 0..string_node.child_count() {
if let Some(child) = string_node.child(i) {
if child.is_named() && child.kind() == "interpolation" {
first_interp = Some(child);
break;
}
}
}
if let Some(interp) = first_interp {
let mut stack: Vec<tree_sitter::Node> = vec![interp];
let mut var_name: Option<String> = None;
while let Some(node) = stack.pop() {
if node.kind() == "identifier" {
let text = node_text(&node, source);
let head = text.split('.').next().unwrap_or(text);
if is_valid_identifier(head) {
var_name = Some(head.to_string());
break;
}
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
if child.is_named() {
stack.push(child);
}
}
}
}
if let Some(var) = var_name {
let stmt_text = std::str::from_utf8(source)
.unwrap_or("")
.lines()
.nth((line - 1) as usize)
.unwrap_or("");
sinks.push(TaintSink {
var,
line,
sink_type: TaintSinkType::HtmlOutput,
tainted: false,
statement: Some(stmt_text.to_string()),
});
continue;
}
}
}
}
}
sinks
}
pub fn detect_sanitizer_ast(
root: &tree_sitter::Node,
source: &[u8],
language: Language,
line: u32,
) -> Option<SanitizerType> {
let patterns = get_ast_patterns(language);
let descendants = walk_descendants(*root);
for descendant in &descendants {
if is_in_comment(descendant, language) || is_in_string(descendant, language) {
continue;
}
let node_line = descendant.start_position().row as u32 + 1;
if node_line != line {
continue;
}
let text = node_text(descendant, source);
for pattern in patterns.sanitizers {
let matched = pattern.call_names.iter().any(|name| {
let call_kinds = call_node_kinds(language);
if call_kinds.contains(&descendant.kind()) {
if let Some(call_name) = extract_call_name(descendant, source, language) {
return call_name == *name;
}
}
false
}) || member_patterns_match(descendant, source, language, pattern.member_patterns, text);
if matched {
return Some(pattern.sanitizer_type);
}
}
}
None
}
fn build_sanitizer_ast_index(
tree: &tree_sitter::Tree,
source: &[u8],
language: Language,
) -> HashMap<u32, SanitizerType> {
let mut index: HashMap<u32, SanitizerType> = HashMap::new();
let patterns = get_ast_patterns(language);
let root = tree.root_node();
let descendants = walk_descendants(root);
for descendant in &descendants {
if is_in_comment(descendant, language) || is_in_string(descendant, language) {
continue;
}
let line = descendant.start_position().row as u32 + 1;
let masked_text = mask_string_literal_descendants(descendant, source, language);
for pattern in patterns.sanitizers {
let matched = pattern.call_names.iter().any(|name| {
let call_kinds = call_node_kinds(language);
if call_kinds.contains(&descendant.kind()) {
if let Some(call_name) = extract_call_name(descendant, source, language) {
return call_name == *name;
}
}
false
}) || member_patterns_match(
descendant,
source,
language,
pattern.member_patterns,
&masked_text,
);
if matched {
index.insert(line, pattern.sanitizer_type);
break;
}
}
}
index
}
fn mask_string_literal_descendants(
descendant: &tree_sitter::Node,
source: &[u8],
language: Language,
) -> String {
let start = descendant.start_byte();
let end = descendant.end_byte();
if end <= start || end > source.len() {
return node_text(descendant, source).to_string();
}
let mut buf: Vec<u8> = source[start..end].to_vec();
let string_kinds = string_node_kinds(language);
for d in walk_descendants(*descendant) {
if !string_kinds.contains(&d.kind()) {
continue;
}
let s = d.start_byte();
let e = d.end_byte();
if e <= start || s >= end {
continue;
}
let local_s = s.saturating_sub(start);
let local_e = e.saturating_sub(start).min(buf.len());
for byte in &mut buf[local_s..local_e] {
*byte = b' ';
}
}
String::from_utf8(buf).unwrap_or_else(|_| node_text(descendant, source).to_string())
}
pub fn compute_taint_with_tree(
cfg: &CfgInfo,
refs: &[VarRef],
statements: &HashMap<u32, String>,
tree: Option<&tree_sitter::Tree>,
source: Option<&[u8]>,
language: Language,
ssa: Option<&SsaFunction>,
) -> Result<TaintInfo, TldrError> {
validate_cfg(cfg)?;
let mut result = TaintInfo::new(&cfg.function);
let predecessors = build_predecessors(cfg);
let successors = build_successors(cfg);
let line_to_block = build_line_to_block(cfg);
let refs_by_block = build_refs_by_block(refs, &line_to_block);
let sanitizer_ast_index: HashMap<u32, SanitizerType> =
if let (Some(t), Some(s)) = (tree, source) {
build_sanitizer_ast_index(t, s, language)
} else {
HashMap::new()
};
if let (Some(tree), Some(src)) = (tree, source) {
let root = tree.root_node();
let all_ast_sources = detect_sources_ast(&root, src, language, None);
let all_ast_sinks = detect_sinks_ast(&root, src, language, None);
let mut ast_sources_by_line: HashMap<u32, Vec<TaintSource>> = HashMap::new();
for s in all_ast_sources {
ast_sources_by_line.entry(s.line).or_default().push(s);
}
let mut ast_sinks_by_line: HashMap<u32, Vec<TaintSink>> = HashMap::new();
for s in all_ast_sinks {
ast_sinks_by_line.entry(s.line).or_default().push(s);
}
for (&line, stmt) in statements {
if let Some(sources) = ast_sources_by_line.remove(&line) {
result.sources.extend(sources);
} else {
result.sources.extend(detect_sources(stmt, line, language));
}
if let Some(sinks) = ast_sinks_by_line.remove(&line) {
result.sinks.extend(sinks);
}
result.sinks.extend(detect_sinks(stmt, line, language));
}
} else {
for (&line, stmt) in statements {
result.sources.extend(detect_sources(stmt, line, language));
result.sinks.extend(detect_sinks(stmt, line, language));
}
}
result.sources.sort_by(|a, b| {
a.line
.cmp(&b.line)
.then_with(|| format!("{:?}", a.source_type).cmp(&format!("{:?}", b.source_type)))
.then_with(|| a.var.cmp(&b.var))
});
result.sources.dedup_by(|a, b| {
a.line == b.line
&& a.var == b.var
&& std::mem::discriminant(&a.source_type) == std::mem::discriminant(&b.source_type)
});
result.sinks.sort_by(|a, b| {
a.line
.cmp(&b.line)
.then_with(|| format!("{:?}", a.sink_type).cmp(&format!("{:?}", b.sink_type)))
.then_with(|| a.var.cmp(&b.var))
});
result.sinks.dedup_by(|a, b| {
a.line == b.line
&& a.var == b.var
&& std::mem::discriminant(&a.sink_type) == std::mem::discriminant(&b.sink_type)
});
let block_ids: Vec<usize> = cfg.blocks.iter().map(|b| b.id).collect();
let mut tainted: HashMap<usize, HashSet<String>> = HashMap::new();
for &bid in &block_ids {
tainted.insert(bid, HashSet::new());
}
for source in &result.sources {
if let Some(&block_id) = line_to_block.get(&source.line) {
tainted
.entry(block_id)
.or_default()
.insert(source.var.clone());
}
}
let unique_vars: HashSet<&str> = refs.iter().map(|r| r.name.as_str()).collect();
let computed_max = block_ids.len() * unique_vars.len().max(1) + 10;
let max_iterations = computed_max.min(MAX_TAINT_ITERATIONS);
let mut worklist: VecDeque<usize> = block_ids.iter().cloned().collect();
let mut iterations = 0;
let mut iteration_limit_reached = false;
let mut source_vars_by_block: HashMap<usize, HashSet<String>> = HashMap::new();
for source in &result.sources {
if let Some(&block_id) = line_to_block.get(&source.line) {
source_vars_by_block
.entry(block_id)
.or_default()
.insert(source.var.clone());
}
}
let mut sources_by_line: HashMap<u32, HashSet<String>> = HashMap::new();
for source in &result.sources {
sources_by_line
.entry(source.line)
.or_default()
.insert(source.var.clone());
}
let ssa_active = ssa.is_some_and(|s| !s.blocks.is_empty());
let ssa_tainted_per_block: Option<HashMap<usize, HashSet<TaintKey>>> = if ssa_active {
let ssa_ref = ssa.expect("ssa_active implies Some");
let ctx = SsaPropagateCtx {
ssa: ssa_ref,
sources: &result.sources,
predecessors: &predecessors,
successors: &successors,
line_to_block: &line_to_block,
max_iterations,
sanitizer_ast_index: &sanitizer_ast_index,
};
let tainted_ssa = ssa_propagate(&ctx, &mut result.sanitized_vars);
for (block_id, taint_keys) in &tainted_ssa {
let str_set: HashSet<String> = taint_keys
.iter()
.filter_map(|k| match k {
TaintKey::Versioned(id) => ssa_ref
.ssa_names
.get(id.0 as usize)
.map(|n| n.variable.clone()),
TaintKey::Raw(s) => Some(s.clone()),
})
.collect();
tainted.insert(*block_id, str_set);
}
Some(tainted_ssa)
} else {
None
};
if !ssa_active {
while let Some(block_id) = worklist.pop_front() {
if iterations >= max_iterations {
iteration_limit_reached = true;
break;
}
iterations += 1;
let mut taint_in: HashSet<String> = predecessors
.get(&block_id)
.map(|preds| {
preds
.iter()
.flat_map(|p| tainted.get(p).cloned().unwrap_or_default())
.collect()
})
.unwrap_or_default();
if let Some(source_vars) = source_vars_by_block.get(&block_id) {
taint_in.extend(source_vars.clone());
}
let taint_out = process_block(
block_id,
taint_in,
&refs_by_block,
&sources_by_line,
&mut result.sanitized_vars,
&sanitizer_ast_index,
);
let old_taint = tainted.get(&block_id).cloned().unwrap_or_default();
if taint_out != old_taint {
tainted.insert(block_id, taint_out);
if let Some(succs) = successors.get(&block_id) {
for &s in succs {
if !worklist.contains(&s) {
worklist.push_back(s);
}
}
}
}
}
}
if iteration_limit_reached {
result.convergence = Some("iteration_limit_reached".to_string());
}
result.tainted_vars = tainted.clone();
for sink in &mut result.sinks {
if let Some(&sink_block) = line_to_block.get(&sink.line) {
if let (Some(tainted_ssa), Some(ssa_ref)) = (ssa_tainted_per_block.as_ref(), ssa) {
if ssa_sink_is_tainted(ssa_ref, sink, sink_block, tainted_ssa) {
sink.tainted = true;
}
if !sink.tainted {
let sink_var_is_ssa_tracked = ssa_ref
.ssa_names
.iter()
.any(|n| n.variable == sink.var);
if !sink_var_is_ssa_tracked
&& !result.sanitized_vars.contains(&sink.var)
{
if let Some(tainted_at_block) = tainted.get(&sink_block) {
if let Some(block) =
cfg.blocks.iter().find(|b| b.id == sink_block)
{
let block_text: String = (block.lines.0..=block.lines.1)
.filter_map(|l| statements.get(&l))
.map(|s| s.as_str())
.collect::<Vec<_>>()
.join(" ");
for tvar in tainted_at_block {
if result.sanitized_vars.contains(tvar) {
continue;
}
if identifier_in_text(&block_text, tvar) {
sink.tainted = true;
break;
}
}
}
}
}
}
} else if let Some(tainted_at_block) = tainted.get(&sink_block) {
if tainted_at_block.contains(&sink.var) {
sink.tainted = true;
} else if !tainted_at_block.is_empty() {
if let Some(block) = cfg.blocks.iter().find(|b| b.id == sink_block) {
let block_text: String = (block.lines.0..=block.lines.1)
.filter_map(|l| statements.get(&l))
.map(|s| s.as_str())
.collect::<Vec<_>>()
.join(" ");
for tvar in tainted_at_block {
if identifier_in_text(&block_text, tvar) {
sink.tainted = true;
break;
}
}
}
}
}
}
}
let sources_clone = result.sources.clone();
let sinks_snapshot: Vec<(String, u32, TaintSinkType, bool, Option<String>)> = result
.sinks
.iter()
.map(|s| {
(
s.var.clone(),
s.line,
s.sink_type,
s.tainted,
s.statement.clone(),
)
})
.collect();
for (sink_var, sink_line, sink_type, sink_tainted, sink_statement) in sinks_snapshot {
if !sink_tainted {
continue;
}
if let Some(&sink_block) = line_to_block.get(&sink_line) {
for source in &sources_clone {
if let Some(&source_block) = line_to_block.get(&source.line) {
let direct =
flows_to(&source.var, &sink_var, &tainted, &predecessors, sink_block);
let indirect = if direct {
false
} else if !result.sanitized_vars.contains(&source.var)
&& tainted
.get(&sink_block)
.map(|t| t.contains(&source.var))
.unwrap_or(false)
{
match &sink_statement {
Some(stmt) => identifier_in_text(stmt, &source.var),
None => false,
}
} else {
false
};
if direct || indirect {
let is_sanitized = result.sanitized_vars.contains(&sink_var)
|| result.sanitized_vars.contains(&source.var);
let causally_ordered = source.line <= sink_line;
if !is_sanitized && causally_ordered {
let path = compute_flow_path(source_block, sink_block, &successors);
let flow = TaintFlow {
source: source.clone(),
sink: TaintSink {
var: sink_var.clone(),
line: sink_line,
sink_type,
tainted: true,
statement: sink_statement.clone(),
},
path,
};
result.flows.push(flow);
}
}
}
}
}
}
Ok(result)
}
fn flows_to(
_source_var: &str,
target_var: &str,
tainted_vars: &HashMap<usize, HashSet<String>>,
_predecessors: &HashMap<usize, Vec<usize>>,
target_block: usize,
) -> bool {
tainted_vars
.get(&target_block)
.map(|t| t.contains(target_var))
.unwrap_or(false)
}
fn compute_flow_path(
source_block: usize,
sink_block: usize,
successors: &HashMap<usize, Vec<usize>>,
) -> Vec<usize> {
if source_block == sink_block {
return vec![source_block];
}
let mut visited: HashSet<usize> = HashSet::new();
let mut queue: VecDeque<Vec<usize>> = VecDeque::new();
queue.push_back(vec![source_block]);
visited.insert(source_block);
while let Some(path) = queue.pop_front() {
let current = *path.last().unwrap();
if let Some(succs) = successors.get(¤t) {
for &next in succs {
if next == sink_block {
let mut result = path.clone();
result.push(next);
return result;
}
if !visited.contains(&next) {
visited.insert(next);
let mut new_path = path.clone();
new_path.push(next);
queue.push_back(new_path);
}
}
}
}
vec![source_block, sink_block]
}
pub fn compute_taint(
cfg: &CfgInfo,
refs: &[VarRef],
statements: &HashMap<u32, String>,
language: Language,
) -> Result<TaintInfo, TldrError> {
let max_line = statements.keys().copied().max().unwrap_or(0) as usize;
let mut lines: Vec<String> = vec![String::new(); max_line];
for (&line, stmt) in statements {
if line >= 1 && (line as usize) <= lines.len() {
lines[(line - 1) as usize] = stmt.clone();
}
}
let src = lines.join("\n");
match crate::ast::parser::parse(&src, language) {
Ok(tree) => compute_taint_with_tree(
cfg,
refs,
statements,
Some(&tree),
Some(src.as_bytes()),
language,
None,
),
Err(_) => Ok(TaintInfo::default()),
}
}
fn process_block(
block_id: usize,
mut current_taint: HashSet<String>,
refs_by_block: &HashMap<usize, Vec<&VarRef>>,
sources_by_line: &HashMap<u32, HashSet<String>>,
sanitized_vars: &mut HashSet<String>,
sanitizer_ast_index: &HashMap<u32, SanitizerType>,
) -> HashSet<String> {
let empty_refs = vec![];
let block_refs = refs_by_block.get(&block_id).unwrap_or(&empty_refs);
for var_ref in block_refs {
match var_ref.ref_type {
RefType::Definition => {
let rhs_tainted = rhs_uses_tainted(var_ref.line, ¤t_taint, block_refs);
let is_source_def = sources_by_line
.get(&var_ref.line)
.is_some_and(|vars| vars.contains(&var_ref.name));
let ast_sanitizer_hit = sanitizer_ast_index.contains_key(&var_ref.line);
if ast_sanitizer_hit {
sanitized_vars.insert(var_ref.name.clone());
current_taint.remove(&var_ref.name);
} else if rhs_tainted || is_source_def {
current_taint.insert(var_ref.name.clone());
} else {
current_taint.remove(&var_ref.name);
}
}
RefType::Use => {
}
RefType::Update => {
let rhs_tainted = rhs_uses_tainted(var_ref.line, ¤t_taint, block_refs);
if rhs_tainted {
current_taint.insert(var_ref.name.clone());
}
}
}
}
current_taint
}
fn rhs_uses_tainted(
line: u32,
current_taint: &HashSet<String>,
block_refs: &[&VarRef],
) -> bool {
block_refs.iter().any(|r| {
r.line == line
&& matches!(r.ref_type, RefType::Use)
&& current_taint.contains(&r.name)
})
}
struct SsaPropagateCtx<'a> {
ssa: &'a SsaFunction,
sources: &'a [TaintSource],
predecessors: &'a HashMap<usize, Vec<usize>>,
successors: &'a HashMap<usize, Vec<usize>>,
line_to_block: &'a HashMap<u32, usize>,
max_iterations: usize,
sanitizer_ast_index: &'a HashMap<u32, SanitizerType>,
}
fn ssa_propagate(
ctx: &SsaPropagateCtx<'_>,
sanitized_vars: &mut HashSet<String>,
) -> HashMap<usize, HashSet<TaintKey>> {
let SsaPropagateCtx {
ssa,
sources,
predecessors,
successors,
line_to_block,
max_iterations,
sanitizer_ast_index,
} = *ctx;
let mut block_insts: HashMap<usize, Vec<&crate::ssa::types::SsaInstruction>> = HashMap::new();
for sblock in &ssa.blocks {
let mut insts: Vec<&crate::ssa::types::SsaInstruction> = sblock.instructions.iter().collect();
insts.sort_by_key(|i| i.line);
block_insts.insert(sblock.id, insts);
}
let mut block_phis: HashMap<usize, &Vec<crate::ssa::types::PhiFunction>> = HashMap::new();
for sblock in &ssa.blocks {
block_phis.insert(sblock.id, &sblock.phi_functions);
}
let mut tainted: HashMap<usize, HashSet<TaintKey>> = HashMap::new();
for source in sources {
if let Some(&block_id) = line_to_block.get(&source.line) {
let block = tainted.entry(block_id).or_default();
let mut seeded_versioned = false;
if let Some(insts) = block_insts.get(&block_id) {
for inst in insts {
if inst.line != source.line {
continue;
}
if let Some(target) = inst.target {
if let Some(name) = ssa.ssa_names.get(target.0 as usize) {
if name.variable == source.var {
block.insert(TaintKey::Versioned(target));
seeded_versioned = true;
}
}
}
}
}
if !seeded_versioned {
block.insert(TaintKey::Raw(source.var.clone()));
}
}
}
let block_ids: Vec<usize> = ssa.blocks.iter().map(|b| b.id).collect();
let mut worklist: VecDeque<usize> = block_ids.iter().cloned().collect();
let mut iterations: usize = 0;
while let Some(block_id) = worklist.pop_front() {
if iterations >= max_iterations {
break;
}
iterations += 1;
let mut taint_in: HashSet<TaintKey> = HashSet::new();
if let Some(preds) = predecessors.get(&block_id) {
for p in preds {
if let Some(t) = tainted.get(p) {
for k in t {
taint_in.insert(k.clone());
}
}
}
}
if let Some(phis) = block_phis.get(&block_id) {
for phi in phis.iter() {
let any_tainted = phi
.sources
.iter()
.any(|s| taint_in.contains(&TaintKey::Versioned(s.name)));
if any_tainted {
taint_in.insert(TaintKey::Versioned(phi.target));
}
}
}
for source in sources {
if line_to_block.get(&source.line) == Some(&block_id) {
if let Some(insts) = block_insts.get(&block_id) {
let mut found = false;
for inst in insts {
if inst.line != source.line {
continue;
}
if let Some(target) = inst.target {
if let Some(name) = ssa.ssa_names.get(target.0 as usize) {
if name.variable == source.var {
taint_in.insert(TaintKey::Versioned(target));
found = true;
}
}
}
}
if !found {
taint_in.insert(TaintKey::Raw(source.var.clone()));
}
}
}
}
let mut current_taint = taint_in.clone();
if let Some(insts) = block_insts.get(&block_id) {
for inst in insts {
let uses_tainted = inst.uses.iter().any(|use_id| {
if current_taint.contains(&TaintKey::Versioned(*use_id)) {
return true;
}
if let Some(name) = ssa.ssa_names.get(use_id.0 as usize) {
if current_taint.contains(&TaintKey::Raw(name.variable.clone())) {
return true;
}
}
false
});
if let Some(target) = inst.target {
let target_var = ssa
.ssa_names
.get(target.0 as usize)
.map(|n| n.variable.clone());
let ast_sanitizer_hit = sanitizer_ast_index.contains_key(&inst.line);
if ast_sanitizer_hit {
if let Some(v) = target_var.clone() {
sanitized_vars.insert(v);
}
} else if uses_tainted {
current_taint.insert(TaintKey::Versioned(target));
} else if let Some(ref v) = target_var {
current_taint.remove(&TaintKey::Raw(v.clone()));
}
}
}
}
let old_taint = tainted.get(&block_id).cloned().unwrap_or_default();
if current_taint != old_taint {
tainted.insert(block_id, current_taint);
if let Some(succs) = successors.get(&block_id) {
for &s in succs {
if !worklist.contains(&s) {
worklist.push_back(s);
}
}
}
}
}
tainted
}
fn ssa_sink_is_tainted(
ssa: &SsaFunction,
sink: &TaintSink,
sink_block: usize,
tainted_ssa: &HashMap<usize, HashSet<TaintKey>>,
) -> bool {
let block_taint = match tainted_ssa.get(&sink_block) {
Some(t) => t,
None => return false,
};
let sblock = match ssa.blocks.iter().find(|b| b.id == sink_block) {
Some(b) => b,
None => return false,
};
for inst in &sblock.instructions {
if inst.line != sink.line {
continue;
}
for use_id in &inst.uses {
if block_taint.contains(&TaintKey::Versioned(*use_id)) {
if let Some(name) = ssa.ssa_names.get(use_id.0 as usize) {
if name.variable == sink.var {
return true;
}
}
}
if let Some(name) = ssa.ssa_names.get(use_id.0 as usize) {
if name.variable == sink.var
&& block_taint.contains(&TaintKey::Raw(name.variable.clone()))
{
return true;
}
}
}
}
if block_taint.contains(&TaintKey::Raw(sink.var.clone())) {
return true;
}
false
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_taint_source_type_serde() {
let source = TaintSourceType::UserInput;
let json = serde_json::to_string(&source).unwrap();
assert_eq!(json, "\"user_input\"");
let parsed: TaintSourceType = serde_json::from_str(&json).unwrap();
assert_eq!(parsed, source);
}
#[test]
fn test_taint_sink_type_serde() {
let sink = TaintSinkType::SqlQuery;
let json = serde_json::to_string(&sink).unwrap();
assert_eq!(json, "\"sql_query\"");
let parsed: TaintSinkType = serde_json::from_str(&json).unwrap();
assert_eq!(parsed, sink);
}
#[test]
fn test_sanitizer_type_serde() {
let sanitizer = SanitizerType::Numeric;
let json = serde_json::to_string(&sanitizer).unwrap();
assert_eq!(json, "\"numeric\"");
let parsed: SanitizerType = serde_json::from_str(&json).unwrap();
assert_eq!(parsed, sanitizer);
}
#[test]
fn test_taint_info_new() {
let info = TaintInfo::new("my_function");
assert_eq!(info.function_name, "my_function");
assert!(info.tainted_vars.is_empty());
assert!(info.sources.is_empty());
assert!(info.sinks.is_empty());
assert!(info.flows.is_empty());
assert!(info.sanitized_vars.is_empty());
}
#[test]
fn test_taint_info_default() {
let info = TaintInfo::default();
assert!(info.function_name.is_empty());
assert!(info.tainted_vars.is_empty());
}
#[test]
fn test_taint_info_is_tainted() {
let mut info = TaintInfo::new("test");
let mut block_taint = HashSet::new();
block_taint.insert("user_input".to_string());
info.tainted_vars.insert(0, block_taint);
assert!(info.is_tainted(0, "user_input"));
assert!(!info.is_tainted(0, "other_var"));
assert!(!info.is_tainted(1, "user_input")); }
#[test]
fn test_taint_info_get_vulnerabilities() {
let mut info = TaintInfo::new("test");
info.sinks.push(TaintSink {
var: "query".to_string(),
line: 5,
sink_type: TaintSinkType::SqlQuery,
tainted: true,
statement: Some("cursor.execute(query)".to_string()),
});
info.sinks.push(TaintSink {
var: "safe_query".to_string(),
line: 10,
sink_type: TaintSinkType::SqlQuery,
tainted: false,
statement: Some("cursor.execute(safe_query)".to_string()),
});
let vulns = info.get_vulnerabilities();
assert_eq!(vulns.len(), 1);
assert_eq!(vulns[0].var, "query");
}
#[test]
fn test_taint_terminates_on_large_cfg_with_backedges() {
use crate::types::{BlockType, CfgBlock, CfgEdge, CfgInfo, EdgeType, RefType, VarRef};
let num_blocks = 50;
let mut blocks = Vec::new();
let mut edges = Vec::new();
for i in 0..num_blocks {
let start_line = (i * 10 + 1) as u32;
let end_line = (i * 10 + 10) as u32;
blocks.push(CfgBlock {
id: i,
block_type: BlockType::Body,
lines: (start_line, end_line),
calls: Vec::new(),
});
}
for i in 0..num_blocks - 1 {
edges.push(CfgEdge {
from: i,
to: i + 1,
edge_type: EdgeType::Unconditional,
condition: None,
});
}
for i in (5..num_blocks).step_by(5) {
edges.push(CfgEdge {
from: i,
to: i - 3,
edge_type: EdgeType::BackEdge,
condition: None,
});
}
let cfg = CfgInfo {
function: "large_func".to_string(),
blocks,
edges,
entry_block: 0,
exit_blocks: vec![num_blocks - 1],
cyclomatic_complexity: 10,
nested_functions: HashMap::new(),
};
let mut refs = Vec::new();
let mut statements = HashMap::new();
for i in 0..num_blocks {
let line = (i * 10 + 1) as u32;
let var_name = format!("var_{}", i);
refs.push(VarRef {
name: var_name.clone(),
ref_type: RefType::Definition,
line,
column: 0,
context: None,
group_id: None,
});
if i > 0 {
statements.insert(line, format!("var_{} = var_{}", i, i - 1));
} else {
statements.insert(line, "var_0 = input()".to_string());
}
}
let start = std::time::Instant::now();
let result = compute_taint(&cfg, &refs, &statements, Language::Python);
let elapsed = start.elapsed();
assert!(result.is_ok(), "compute_taint should succeed");
assert!(
elapsed.as_secs() < 5,
"compute_taint took too long: {:?} (possible infinite loop)",
elapsed
);
let info = result.unwrap();
assert!(!info.sources.is_empty(), "Should detect input() source");
}
#[test]
fn test_taint_iteration_cap_prevents_runaway() {
use crate::types::{BlockType, CfgBlock, CfgEdge, CfgInfo, EdgeType, RefType, VarRef};
let blocks = vec![
CfgBlock {
id: 0,
block_type: BlockType::Body,
lines: (1, 100),
calls: Vec::new(),
},
CfgBlock {
id: 1,
block_type: BlockType::Body,
lines: (101, 200),
calls: Vec::new(),
},
];
let edges = vec![
CfgEdge {
from: 0,
to: 1,
edge_type: EdgeType::Unconditional,
condition: None,
},
CfgEdge {
from: 1,
to: 0,
edge_type: EdgeType::BackEdge,
condition: None,
},
];
let cfg = CfgInfo {
function: "runaway".to_string(),
blocks,
edges,
entry_block: 0,
exit_blocks: vec![1],
cyclomatic_complexity: 2,
nested_functions: HashMap::new(),
};
let mut refs = Vec::new();
let mut statements = HashMap::new();
for i in 0..500 {
let line = (i + 1) as u32;
refs.push(VarRef {
name: format!("v{}", i),
ref_type: RefType::Definition,
line,
column: 0,
context: None,
group_id: None,
});
statements.insert(line, format!("v{} = input()", i));
}
let start = std::time::Instant::now();
let result = compute_taint(&cfg, &refs, &statements, Language::Python);
let elapsed = start.elapsed();
assert!(result.is_ok());
assert!(
elapsed.as_secs() < 5,
"Should terminate quickly with iteration cap, took {:?}",
elapsed
);
}
#[test]
fn test_sources_are_deduplicated() {
use crate::ast::ParserPool;
use crate::types::{BlockType, CfgBlock, CfgEdge, CfgInfo, EdgeType, RefType, VarRef};
let python_code = r#"import os
def vulnerable_func(user_input):
data = input("Enter: ")
query = "SELECT * FROM users WHERE id = " + data
os.system(user_input)
eval(data)
"#;
let cfg = CfgInfo {
function: "vulnerable_func".to_string(),
blocks: vec![
CfgBlock {
id: 0,
block_type: BlockType::Entry,
lines: (3, 3),
calls: Vec::new(),
},
CfgBlock {
id: 1,
block_type: BlockType::Body,
lines: (4, 7),
calls: vec![
"input".to_string(),
"os.system".to_string(),
"eval".to_string(),
],
},
],
edges: vec![CfgEdge {
from: 0,
to: 1,
edge_type: EdgeType::Unconditional,
condition: None,
}],
entry_block: 0,
exit_blocks: vec![1],
cyclomatic_complexity: 1,
nested_functions: HashMap::new(),
};
let refs = vec![
VarRef {
name: "user_input".to_string(),
ref_type: RefType::Definition,
line: 3,
column: 0,
context: None,
group_id: None,
},
VarRef {
name: "data".to_string(),
ref_type: RefType::Definition,
line: 4,
column: 0,
context: None,
group_id: None,
},
VarRef {
name: "query".to_string(),
ref_type: RefType::Definition,
line: 5,
column: 0,
context: None,
group_id: None,
},
];
let mut statements: HashMap<u32, String> = HashMap::new();
for (i, line) in python_code.lines().enumerate() {
statements.insert((i + 1) as u32, line.to_string());
}
let pool = ParserPool::new();
let tree = pool.parse(python_code, Language::Python).ok();
let result = compute_taint_with_tree(
&cfg,
&refs,
&statements,
tree.as_ref(),
Some(python_code.as_bytes()),
Language::Python,
None,
)
.unwrap();
let mut seen = std::collections::HashSet::new();
for source in &result.sources {
let key = (
source.line,
std::mem::discriminant(&source.source_type),
source.var.clone(),
);
assert!(
seen.insert(key.clone()),
"Duplicate source found: line={}, var={}, type={:?}",
source.line,
source.var,
source.source_type
);
}
let mut seen_sinks = std::collections::HashSet::new();
for sink in &result.sinks {
let key = (
sink.line,
std::mem::discriminant(&sink.sink_type),
sink.var.clone(),
);
assert!(
seen_sinks.insert(key.clone()),
"Duplicate sink found: line={}, var={}, type={:?}",
sink.line,
sink.var,
sink.sink_type
);
}
}
#[test]
fn test_sinks_detected_via_merge() {
use crate::ast::ParserPool;
use crate::types::{BlockType, CfgBlock, CfgEdge, CfgInfo, EdgeType, RefType, VarRef};
let python_code = r#"import os
def vuln(user_input):
os.system(user_input)
eval(user_input)
"#;
let cfg = CfgInfo {
function: "vuln".to_string(),
blocks: vec![
CfgBlock {
id: 0,
block_type: BlockType::Entry,
lines: (3, 3),
calls: Vec::new(),
},
CfgBlock {
id: 1,
block_type: BlockType::Body,
lines: (4, 5),
calls: vec!["os.system".to_string(), "eval".to_string()],
},
],
edges: vec![CfgEdge {
from: 0,
to: 1,
edge_type: EdgeType::Unconditional,
condition: None,
}],
entry_block: 0,
exit_blocks: vec![1],
cyclomatic_complexity: 1,
nested_functions: HashMap::new(),
};
let refs = vec![VarRef {
name: "user_input".to_string(),
ref_type: RefType::Definition,
line: 3,
column: 0,
context: None,
group_id: None,
}];
let mut statements: HashMap<u32, String> = HashMap::new();
for (i, line) in python_code.lines().enumerate() {
statements.insert((i + 1) as u32, line.to_string());
}
let pool = ParserPool::new();
let tree = pool.parse(python_code, Language::Python).ok();
let result = compute_taint_with_tree(
&cfg,
&refs,
&statements,
tree.as_ref(),
Some(python_code.as_bytes()),
Language::Python,
None,
)
.unwrap();
let sink_types: Vec<_> = result.sinks.iter().map(|s| s.sink_type).collect();
assert!(
sink_types.contains(&TaintSinkType::ShellExec),
"Should detect os.system as ShellExec sink, got: {:?}",
sink_types
);
assert!(
sink_types.contains(&TaintSinkType::CodeEval),
"Should detect eval as CodeEval sink, got: {:?}",
sink_types
);
}
}