use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use walkdir::WalkDir;
use crate::ast::extract::{extract_classes_detailed, extract_functions_detailed};
use crate::ast::parser::parse;
use crate::cfg::extractor::extract_cfg_from_tree;
use crate::dfg::extractor::extract_dfg_from_tree_with_cfg;
use crate::error::TldrError;
use crate::security::taint::{
compute_taint_with_tree, TaintSink as CanonicalTaintSink, TaintSinkType,
TaintSource as CanonicalTaintSource, TaintSourceType,
};
use crate::types::Language;
use crate::TldrResult;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum VulnType {
SqlInjection,
Xss,
CommandInjection,
PathTraversal,
Ssrf,
Deserialization,
OpenRedirect,
}
impl std::fmt::Display for VulnType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
VulnType::SqlInjection => write!(f, "SQL Injection"),
VulnType::Xss => write!(f, "Cross-Site Scripting (XSS)"),
VulnType::CommandInjection => write!(f, "Command Injection"),
VulnType::PathTraversal => write!(f, "Path Traversal"),
VulnType::Ssrf => write!(f, "Server-Side Request Forgery"),
VulnType::Deserialization => write!(f, "Unsafe Deserialization"),
VulnType::OpenRedirect => write!(f, "Open Redirect"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TaintSource {
pub variable: String,
pub source_type: String,
pub line: u32,
pub expression: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TaintSink {
pub function: String,
pub sink_type: String,
pub line: u32,
pub expression: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VulnFinding {
pub vuln_type: VulnType,
pub file: PathBuf,
pub source: TaintSource,
pub sink: TaintSink,
pub flow_path: Vec<String>,
pub severity: String,
pub remediation: String,
pub cwe_id: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VulnSummary {
pub total_findings: usize,
pub by_type: HashMap<String, usize>,
pub affected_files: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VulnReport {
pub findings: Vec<VulnFinding>,
pub files_scanned: usize,
pub summary: VulnSummary,
}
impl From<CanonicalTaintSource> for TaintSource {
fn from(canonical: CanonicalTaintSource) -> Self {
Self {
variable: canonical.var,
source_type: format!("{:?}", canonical.source_type),
line: canonical.line,
expression: canonical
.statement
.map(|s| s.trim().to_string())
.unwrap_or_default(),
}
}
}
impl From<CanonicalTaintSink> for TaintSink {
fn from(canonical: CanonicalTaintSink) -> Self {
Self {
function: canonical.var.clone(),
sink_type: format!("{:?}", canonical.sink_type),
line: canonical.line,
expression: canonical
.statement
.map(|s| s.trim().to_string())
.unwrap_or_default(),
}
}
}
fn vuln_type_from_sink(sink_type: TaintSinkType) -> VulnType {
match sink_type {
TaintSinkType::SqlQuery => VulnType::SqlInjection,
TaintSinkType::ShellExec
| TaintSinkType::CodeEval
| TaintSinkType::CodeExec
| TaintSinkType::CodeCompile => VulnType::CommandInjection,
TaintSinkType::HtmlOutput => VulnType::Xss,
TaintSinkType::FileOpen | TaintSinkType::FileWrite => VulnType::PathTraversal,
TaintSinkType::HttpRequest => VulnType::Ssrf,
TaintSinkType::Deserialize => VulnType::Deserialization,
TaintSinkType::OpenRedirect => VulnType::OpenRedirect,
}
}
fn severity_for(_vuln_type: VulnType) -> &'static str {
"HIGH"
}
fn sink_type_precedence(sink_type: TaintSinkType) -> u32 {
match sink_type {
TaintSinkType::SqlQuery => 110,
TaintSinkType::ShellExec => 100,
TaintSinkType::CodeEval => 95,
TaintSinkType::CodeExec => 90,
TaintSinkType::CodeCompile => 85,
TaintSinkType::Deserialize => 80,
TaintSinkType::HtmlOutput => 70,
TaintSinkType::FileOpen => 60,
TaintSinkType::FileWrite => 50,
TaintSinkType::HttpRequest => 40,
TaintSinkType::OpenRedirect => 35,
}
}
fn descriptions_for(source_type: TaintSourceType, language: Language) -> &'static str {
match (source_type, language) {
(TaintSourceType::HttpParam, Language::Python) => "Flask GET/POST parameter",
(TaintSourceType::HttpParam, Language::JavaScript)
| (TaintSourceType::HttpParam, Language::TypeScript) => "Express query/route parameter",
(TaintSourceType::HttpParam, Language::Go) => "HTTP query parameter",
(TaintSourceType::HttpParam, Language::Java) => "Servlet parameter",
(TaintSourceType::HttpParam, Language::Ruby) => "Rails parameter",
(TaintSourceType::HttpParam, Language::Kotlin) => "Ktor query parameter",
(TaintSourceType::HttpParam, Language::Scala) => "Play query parameter",
(TaintSourceType::HttpParam, Language::CSharp) => "ASP.NET request parameter",
(TaintSourceType::HttpParam, Language::Php) => "PHP $_GET / $_POST / $_REQUEST",
(TaintSourceType::HttpParam, Language::Elixir) => "Phoenix conn.params",
(TaintSourceType::HttpParam, Language::Lua) | (TaintSourceType::HttpParam, Language::Luau) => {
"OpenResty/ngx request args"
}
(TaintSourceType::HttpParam, _) => "HTTP request parameter",
(TaintSourceType::HttpBody, Language::Python) => "Flask JSON/raw request body",
(TaintSourceType::HttpBody, Language::JavaScript)
| (TaintSourceType::HttpBody, Language::TypeScript) => "Express request body",
(TaintSourceType::HttpBody, _) => "HTTP request body",
(TaintSourceType::UserInput, Language::Python) => "User input from stdin",
(TaintSourceType::UserInput, Language::Java) => "User input (Scanner / readLine)",
(TaintSourceType::UserInput, Language::Kotlin) => "User input (readLine)",
(TaintSourceType::UserInput, Language::Scala) => "User input (StdIn / readLine)",
(TaintSourceType::UserInput, Language::CSharp) => "User input (Console.ReadLine)",
(TaintSourceType::UserInput, Language::Swift) => "User input (CommandLine.arguments / readLine)",
(TaintSourceType::UserInput, Language::Lua)
| (TaintSourceType::UserInput, Language::Luau) => "User input (io.read)",
(TaintSourceType::UserInput, _) => "User input from stdin",
(TaintSourceType::Stdin, Language::C) | (TaintSourceType::Stdin, Language::Cpp) => {
"Standard input (scanf / fgets / cin)"
}
(TaintSourceType::Stdin, _) => "Standard input",
(TaintSourceType::EnvVar, Language::Python) => "Environment variable (os.environ / os.getenv)",
(TaintSourceType::EnvVar, Language::JavaScript)
| (TaintSourceType::EnvVar, Language::TypeScript) => "Environment variable (process.env)",
(TaintSourceType::EnvVar, Language::Go) => "Environment variable (os.Getenv)",
(TaintSourceType::EnvVar, Language::Rust) => "Environment variable (std::env::var)",
(TaintSourceType::EnvVar, Language::Java) => "Environment variable (System.getenv)",
(TaintSourceType::EnvVar, Language::Kotlin) => "Environment variable (System.getenv)",
(TaintSourceType::EnvVar, Language::Scala) => "Environment variable (sys.env)",
(TaintSourceType::EnvVar, Language::CSharp) => "Environment variable (Environment.GetEnvironmentVariable)",
(TaintSourceType::EnvVar, Language::Php) => "Environment variable ($_ENV / getenv)",
(TaintSourceType::EnvVar, Language::Ruby) => "Environment variable (ENV)",
(TaintSourceType::EnvVar, Language::C) | (TaintSourceType::EnvVar, Language::Cpp) => "Environment variable (getenv)",
(TaintSourceType::EnvVar, Language::Lua) | (TaintSourceType::EnvVar, Language::Luau) => "Environment variable (os.getenv)",
(TaintSourceType::EnvVar, Language::Swift) => "Environment variable (ProcessInfo.environment)",
(TaintSourceType::EnvVar, Language::Elixir) => "Environment variable (System.get_env)",
(TaintSourceType::EnvVar, Language::Ocaml) => "Environment variable (Sys.getenv)",
(TaintSourceType::FileRead, _) => "Untrusted file read",
}
}
fn get_remediation(vuln_type: VulnType) -> &'static str {
match vuln_type {
VulnType::SqlInjection =>
"Use parameterized queries or prepared statements instead of string concatenation",
VulnType::Xss =>
"Sanitize output using context-appropriate encoding (HTML, JavaScript, URL, etc.)",
VulnType::CommandInjection =>
"Use subprocess with shell=False and pass arguments as a list, or use shlex.quote()",
VulnType::PathTraversal =>
"Validate paths against a whitelist or use realpath() and verify the result is within allowed directories",
VulnType::Ssrf =>
"Validate URLs against an allowlist of domains and protocols",
VulnType::Deserialization =>
"Avoid deserializing untrusted data, or use safer formats like JSON",
VulnType::OpenRedirect =>
"Validate redirect targets against an allowlist of trusted URLs/origins; do not concatenate user input into the redirect target",
}
}
fn get_cwe_id(vuln_type: VulnType) -> &'static str {
match vuln_type {
VulnType::SqlInjection => "CWE-89",
VulnType::Xss => "CWE-79",
VulnType::CommandInjection => "CWE-78",
VulnType::PathTraversal => "CWE-22",
VulnType::Ssrf => "CWE-918",
VulnType::Deserialization => "CWE-502",
VulnType::OpenRedirect => "CWE-601",
}
}
pub fn scan_vulnerabilities(
path: &Path,
language: Option<Language>,
vuln_type: Option<VulnType>,
) -> TldrResult<VulnReport> {
let mut findings = Vec::new();
let files: Vec<PathBuf> = if path.is_file() {
vec![path.to_path_buf()]
} else {
WalkDir::new(path)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.filter(|e| {
let detected = Language::from_path(e.path());
match (detected, language) {
(Some(d), Some(l)) => d == l,
(Some(_), None) => true,
_ => false,
}
})
.map(|e| e.path().to_path_buf())
.collect()
};
use rayon::prelude::*;
let scan_results: Vec<Vec<VulnFinding>> = files
.par_iter()
.map(|file_path| scan_file_vulns(file_path, vuln_type).unwrap_or_default())
.collect();
for file_findings in scan_results {
if !file_findings.is_empty() {
findings.extend(file_findings);
}
}
let files_scanned = files.len();
let mut by_type: HashMap<String, usize> = HashMap::new();
let mut affected_files: HashSet<PathBuf> = HashSet::new();
for finding in &findings {
*by_type.entry(finding.vuln_type.to_string()).or_insert(0) += 1;
affected_files.insert(finding.file.clone());
}
let summary = VulnSummary {
total_findings: findings.len(),
by_type,
affected_files: affected_files.len(),
};
Ok(VulnReport {
findings,
files_scanned,
summary,
})
}
fn is_parameterized_sql(line: &str) -> bool {
let has_placeholder =
line.contains('?') || line.contains("%s") || has_named_param(line);
let has_args_collection =
line.contains(", (") || line.contains(", [") || line.contains(", {");
has_placeholder && has_args_collection
}
fn has_named_param(line: &str) -> bool {
let bytes = line.as_bytes();
for (i, &b) in bytes.iter().enumerate() {
if b == b':' && i + 1 < bytes.len() && bytes[i + 1].is_ascii_alphabetic() {
if i == 0 {
return true;
}
let prev = bytes[i - 1];
if prev == b' ' || prev == b'=' || prev == b'\'' || prev == b'"' {
return true;
}
}
}
false
}
fn is_safe_subprocess_call(line: &str) -> bool {
if line.contains("shell=True") {
return false;
}
if line.contains("shell=False") {
return true;
}
for prefix in &["subprocess.run(", "subprocess.call(", "subprocess.Popen("] {
if let Some(pos) = line.find(prefix) {
let after = &line[pos + prefix.len()..];
if after.trim_start().starts_with('[') {
return true;
}
}
}
false
}
fn scan_file_vulns(path: &Path, vuln_filter: Option<VulnType>) -> TldrResult<Vec<VulnFinding>> {
let content = std::fs::read_to_string(path)?;
let language = Language::from_path(path).ok_or_else(|| {
TldrError::UnsupportedLanguage(
path.extension()
.and_then(|e| e.to_str())
.unwrap_or("unknown")
.to_string(),
)
})?;
let tree = match parse(&content, language) {
Ok(t) => t,
Err(_) => return Ok(Vec::new()), };
let mut fn_infos: Vec<crate::types::FunctionInfo> =
extract_functions_detailed(&tree, &content, language);
let class_infos = extract_classes_detailed(&tree, &content, language);
for class_info in class_infos {
fn_infos.extend(class_info.methods);
}
fn_infos.sort_by(|a, b| a.line_number.cmp(&b.line_number).then(a.name.cmp(&b.name)));
fn_infos.dedup_by(|a, b| a.name == b.name && a.line_number == b.line_number);
let path_str = path.to_str().unwrap_or_default();
let path_buf = path.to_path_buf();
let source_bytes = content.as_bytes();
let mut findings: Vec<VulnFinding> = Vec::new();
if fn_infos.is_empty() {
return Ok(findings);
}
use rayon::prelude::*;
let total_lines = content.lines().count() as u32;
let mut fn_body_ranges: Vec<(u32, u32)> = Vec::with_capacity(fn_infos.len());
for (i, fi) in fn_infos.iter().enumerate() {
let start = fi.line_number.max(1);
let end = if i + 1 < fn_infos.len() {
fn_infos[i + 1].line_number.saturating_sub(1).max(start)
} else {
total_lines.max(start)
};
fn_body_ranges.push((start, end));
}
let line_offsets: Vec<usize> = {
let mut v: Vec<usize> = Vec::with_capacity(total_lines as usize + 1);
v.push(0);
for (i, b) in content.bytes().enumerate() {
if b == b'\n' {
v.push(i + 1);
}
}
if *v.last().unwrap_or(&0) != content.len() {
v.push(content.len());
}
v
};
let body_slice = |start_line: u32, end_line: u32| -> &str {
let s = (start_line.saturating_sub(1) as usize).min(line_offsets.len() - 1);
let e_idx = (end_line as usize).min(line_offsets.len() - 1);
let start_byte = line_offsets[s];
let end_byte = line_offsets[e_idx];
&content[start_byte..end_byte]
};
let per_fn_findings: Vec<Vec<(VulnFinding, TaintSinkType)>> = fn_infos
.par_iter()
.enumerate()
.map(|(idx, fn_info)| {
let (start_line, end_line) = fn_body_ranges[idx];
let body_text = body_slice(start_line, end_line);
if !crate::security::taint::function_body_has_taint_pattern(body_text, language) {
return Vec::new();
}
let cfg = match extract_cfg_from_tree(&tree, &content, &fn_info.name, language) {
Ok(c) if !c.blocks.is_empty() => c,
_ => return Vec::new(),
};
let dfg = match extract_dfg_from_tree_with_cfg(
&tree,
&content,
&fn_info.name,
language,
&cfg,
) {
Ok(d) => d,
Err(_) => return Vec::new(),
};
let ssa: Option<&crate::ssa::types::SsaFunction> = None;
let (fn_start, fn_end) = {
let start = cfg.blocks.iter().map(|b| b.lines.0).min().unwrap_or(1);
let end = cfg
.blocks
.iter()
.map(|b| b.lines.1)
.max()
.unwrap_or(content.lines().count() as u32);
(start, end)
};
let statements: HashMap<u32, String> = content
.lines()
.enumerate()
.filter(|(i, _)| {
let line_num = (i + 1) as u32;
line_num >= fn_start && line_num <= fn_end
})
.map(|(i, line)| ((i + 1) as u32, line.to_string()))
.collect();
let info = match compute_taint_with_tree(
&cfg,
&dfg.refs,
&statements,
Some(&tree),
Some(source_bytes),
language,
ssa,
) {
Ok(i) => i,
Err(_) => return Vec::new(),
};
let mut local: Vec<(VulnFinding, TaintSinkType)> = Vec::new();
for flow in info.flows {
let canonical_sink_type = flow.sink.sink_type;
let vuln_type = vuln_type_from_sink(canonical_sink_type);
if let Some(filter) = vuln_filter {
if vuln_type != filter {
continue;
}
}
let stmt_text = flow.sink.statement.as_deref().unwrap_or("");
if vuln_type == VulnType::SqlInjection && is_parameterized_sql(stmt_text) {
continue;
}
if vuln_type == VulnType::CommandInjection
&& is_safe_subprocess_call(stmt_text)
{
continue;
}
if flow.source.line == flow.sink.line
&& flow.source.var == flow.sink.var
&& flow
.source
.statement
.as_deref()
.unwrap_or("")
== flow.sink.statement.as_deref().unwrap_or("")
{
continue;
}
let description =
descriptions_for(flow.source.source_type, language).to_string();
let source_record: TaintSource = flow.source.clone().into();
let sink_record: TaintSink = flow.sink.clone().into();
let flow_path: Vec<String> = if flow.path.is_empty() {
vec![
format!(
"{}:{} - taint source",
source_record.line, source_record.variable
),
format!("{}:{} - sink", sink_record.line, sink_record.function),
]
} else {
flow.path
.iter()
.map(|bid| format!("block-{}", bid))
.collect()
};
local.push((
VulnFinding {
vuln_type,
file: path_buf.clone(),
source: TaintSource {
variable: source_record.variable,
source_type: description,
line: source_record.line,
expression: source_record.expression,
},
sink: sink_record,
flow_path,
severity: severity_for(vuln_type).to_string(),
remediation: get_remediation(vuln_type).to_string(),
cwe_id: Some(get_cwe_id(vuln_type).to_string()),
},
canonical_sink_type,
));
}
local
})
.collect();
use std::collections::hash_map::Entry;
let mut best: HashMap<(String, u32, u32, String, VulnType), (VulnFinding, TaintSinkType)> =
HashMap::new();
for fn_findings in per_fn_findings {
for (finding, sink_type) in fn_findings {
let key = (
finding.file.display().to_string(),
finding.sink.line,
finding.source.line,
finding.source.variable.clone(),
finding.vuln_type,
);
match best.entry(key) {
Entry::Vacant(v) => {
v.insert((finding, sink_type));
}
Entry::Occupied(mut o) => {
let cur_rank = sink_type_precedence(o.get().1);
let new_rank = sink_type_precedence(sink_type);
if new_rank > cur_rank {
o.insert((finding, sink_type));
}
}
}
}
}
findings.extend(best.into_values().map(|(f, _)| f));
let _ = path_str; Ok(findings)
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_vuln_type_display() {
assert_eq!(VulnType::SqlInjection.to_string(), "SQL Injection");
assert_eq!(VulnType::Xss.to_string(), "Cross-Site Scripting (XSS)");
}
#[test]
fn test_cwe_ids() {
assert_eq!(get_cwe_id(VulnType::SqlInjection), "CWE-89");
assert_eq!(get_cwe_id(VulnType::Xss), "CWE-79");
assert_eq!(get_cwe_id(VulnType::CommandInjection), "CWE-78");
}
#[test]
fn test_vuln_type_from_sink_exhaustive() {
assert_eq!(vuln_type_from_sink(TaintSinkType::SqlQuery), VulnType::SqlInjection);
assert_eq!(vuln_type_from_sink(TaintSinkType::ShellExec), VulnType::CommandInjection);
assert_eq!(vuln_type_from_sink(TaintSinkType::CodeEval), VulnType::CommandInjection);
assert_eq!(vuln_type_from_sink(TaintSinkType::CodeExec), VulnType::CommandInjection);
assert_eq!(vuln_type_from_sink(TaintSinkType::CodeCompile), VulnType::CommandInjection);
assert_eq!(vuln_type_from_sink(TaintSinkType::HtmlOutput), VulnType::Xss);
assert_eq!(vuln_type_from_sink(TaintSinkType::FileOpen), VulnType::PathTraversal);
assert_eq!(vuln_type_from_sink(TaintSinkType::FileWrite), VulnType::PathTraversal);
assert_eq!(vuln_type_from_sink(TaintSinkType::HttpRequest), VulnType::Ssrf);
assert_eq!(vuln_type_from_sink(TaintSinkType::Deserialize), VulnType::Deserialization);
}
#[test]
fn test_go_vuln_e2e() {
let go_code = r#"package main
import (
"database/sql"
"net/http"
"os/exec"
)
func handler(w http.ResponseWriter, r *http.Request) {
id := r.URL.Query().Get("id")
db, _ := sql.Open("mysql", "dsn")
db.Query("SELECT * FROM users WHERE id = " + id)
cmd := r.URL.Query().Get("cmd")
out, _ := exec.Command(cmd).Output()
}
"#;
let tmp = std::env::temp_dir().join("test_go_vuln_e2e.go");
std::fs::write(&tmp, go_code).unwrap();
let result = scan_vulnerabilities(&tmp, None, None).unwrap();
eprintln!("Go findings: {}", result.findings.len());
for f in &result.findings {
eprintln!(
" {:?} line {}: {} -> {}",
f.vuln_type, f.sink.line, f.source.variable, f.sink.function
);
}
assert!(
!result.findings.is_empty(),
"Expected Go SQL injection finding, got {}",
result.findings.len()
);
std::fs::remove_file(&tmp).ok();
}
#[test]
fn test_e2e_parameterized_query_no_findings() {
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("safe_sql.py");
std::fs::write(
&file,
r#"
from flask import request
import sqlite3
def handler():
user_id = request.args.get("id")
cursor.execute("SELECT * FROM users WHERE id = ?", (user_id,))
"#,
)
.unwrap();
let findings = scan_file_vulns(&file, None).unwrap();
assert!(
findings.is_empty(),
"Parameterized query must produce 0 findings, got {}",
findings.len()
);
}
#[test]
fn test_e2e_subprocess_list_no_findings() {
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("safe_cmd.py");
std::fs::write(
&file,
r#"
from flask import request
def handler():
filename = request.args.get("file")
subprocess.run(["cat", filename])
"#,
)
.unwrap();
let findings = scan_file_vulns(&file, None).unwrap();
assert!(
findings.is_empty(),
"subprocess.run with list args must produce 0 findings, got {}",
findings.len()
);
}
#[test]
fn test_e2e_type_coercion_no_findings() {
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("safe_int.py");
std::fs::write(
&file,
r#"
from flask import request
def handler():
user_id = int(request.args.get("id"))
cursor.execute(f"SELECT * FROM users WHERE id = {user_id}")
"#,
)
.unwrap();
let findings = scan_file_vulns(&file, None).unwrap();
assert!(
findings.is_empty(),
"int() type coercion must break taint, producing 0 findings, got {}",
findings.len()
);
}
#[test]
fn test_e2e_real_sqli_still_detected() {
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("vuln_sql.py");
std::fs::write(
&file,
r#"
from flask import request
def handler():
name = request.args.get("name")
cursor.execute(f"SELECT * FROM users WHERE name = '{name}'")
"#,
)
.unwrap();
let findings = scan_file_vulns(&file, None).unwrap();
assert!(
!findings.is_empty(),
"Real SQL injection must still be detected"
);
}
#[test]
fn test_e2e_real_command_injection_still_detected() {
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("vuln_cmd.py");
std::fs::write(
&file,
r#"
from flask import request
def handler():
filename = request.args.get("file")
os.system("cat " + filename)
"#,
)
.unwrap();
let findings = scan_file_vulns(&file, None).unwrap();
assert!(
!findings.is_empty(),
"Real command injection must still be detected"
);
}
#[test]
fn test_taint_finding_dedupe_eval_compile_collapses_to_one() {
let py = r#"
def from_pyfile(filename, d):
config_file = open(filename, "rb").read()
eval(compile(config_file, filename, "exec"), d.__dict__)
return True
"#;
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("dedupe_repro.py");
std::fs::write(&file, py).unwrap();
let findings = scan_file_vulns(&file, None).unwrap();
let cmd_findings: Vec<_> = findings
.iter()
.filter(|f| f.vuln_type == VulnType::CommandInjection)
.collect();
assert_eq!(
cmd_findings.len(),
1,
"Expected exactly 1 CommandInjection finding post-dedupe, got {}: {:?}",
cmd_findings.len(),
cmd_findings
.iter()
.map(|f| f.sink.sink_type.clone())
.collect::<Vec<_>>()
);
assert_eq!(
cmd_findings[0].sink.sink_type, "CodeEval",
"Dedupe must keep CodeEval (highest sink_type_precedence) over \
CodeExec/CodeCompile, got {}",
cmd_findings[0].sink.sink_type
);
}
#[test]
fn test_taint_finding_dedupe_distinct_source_vars_kept() {
let py = r#"
import os
from flask import request
def handler():
env_var = os.environ.get("HOME")
name = request.args.get("name")
os.system("echo " + env_var + " " + name)
"#;
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("distinct_vars.py");
std::fs::write(&file, py).unwrap();
let findings = scan_file_vulns(&file, None).unwrap();
let cmd_findings: Vec<_> = findings
.iter()
.filter(|f| f.vuln_type == VulnType::CommandInjection)
.collect();
assert_eq!(
cmd_findings.len(),
2,
"Expected 2 distinct CommandInjection findings (one per source \
variable), got {}",
cmd_findings.len()
);
let mut src_vars: Vec<&str> = cmd_findings
.iter()
.map(|f| f.source.variable.as_str())
.collect();
src_vars.sort();
assert_eq!(src_vars, vec!["env_var", "name"]);
}
#[test]
fn test_taint_flow_causal_ordering_open_then_read_no_inversion() {
let py = r#"
def from_pyfile(filename):
with open(filename, mode="rb") as config_file:
exec(compile(config_file.read(), filename, "exec"), d.__dict__)
return True
"#;
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("inversion_repro.py");
std::fs::write(&file, py).unwrap();
let findings = scan_file_vulns(&file, None).unwrap();
for f in &findings {
assert!(
f.source.line <= f.sink.line,
"Causal ordering violated: source.line={} > sink.line={} \
(vuln_type={:?}, file={:?})",
f.source.line,
f.sink.line,
f.vuln_type,
f.file
);
}
}
fn assert_detects_vuln(
filename: &str,
content: &str,
vuln_type: VulnType,
) -> TldrResult<Vec<VulnFinding>> {
let temp = TempDir::new().unwrap();
let path = temp.path().join(filename);
fs::write(&path, content).unwrap();
scan_file_vulns(&path, Some(vuln_type))
}
#[test]
fn test_e2e_rust_command_injection() {
let findings = assert_detects_vuln(
"main.rs",
"fn main() {\n let cmd = std::env::args().nth(1).unwrap();\n std::process::Command::new(cmd);\n}\n",
VulnType::CommandInjection,
)
.unwrap();
assert!(!findings.is_empty());
}
#[test]
fn test_e2e_ruby_command_injection() {
let findings = assert_detects_vuln(
"app.rb",
"def handler\n cmd = params[:cmd]\n system(cmd)\nend\n",
VulnType::CommandInjection,
)
.unwrap();
assert!(!findings.is_empty());
}
#[test]
fn test_e2e_c_command_injection() {
let findings = assert_detects_vuln(
"main.c",
"int main(int argc, char **argv) {\n char *cmd = argv[1];\n system(cmd);\n return 0;\n}\n",
VulnType::CommandInjection,
)
.unwrap();
assert!(!findings.is_empty());
}
#[test]
fn test_e2e_cpp_command_injection() {
let findings = assert_detects_vuln(
"main.cpp",
"int main(int argc, char **argv) {\n char *cmd = argv[1];\n system(cmd);\n return 0;\n}\n",
VulnType::CommandInjection,
)
.unwrap();
assert!(!findings.is_empty());
}
#[test]
fn test_e2e_php_command_injection() {
let findings = assert_detects_vuln(
"index.php",
"<?php\nfunction handler() {\n $cmd = $_GET['cmd'];\n system($cmd);\n}\n",
VulnType::CommandInjection,
)
.unwrap();
assert!(!findings.is_empty());
}
#[test]
fn test_e2e_kotlin_command_injection() {
let findings = assert_detects_vuln(
"Main.kt",
"fun handler(call: ApplicationCall) {\n val cmd = call.request.queryParameters[\"cmd\"] ?: \"\"\n Runtime.getRuntime().exec(cmd)\n}\n",
VulnType::CommandInjection,
)
.unwrap();
assert!(!findings.is_empty());
}
#[test]
fn test_e2e_swift_command_injection() {
let findings = assert_detects_vuln(
"main.swift",
"func handler() {\n let cmd = CommandLine.arguments[1]\n system(cmd)\n}\n",
VulnType::CommandInjection,
)
.unwrap();
assert!(!findings.is_empty());
}
#[test]
fn test_e2e_csharp_command_injection() {
let findings = assert_detects_vuln(
"Program.cs",
"public class C { public void H(HttpRequest Request) { var cmd = Request.Query[\"cmd\"]; Process.Start(cmd); } }\n",
VulnType::CommandInjection,
)
.unwrap();
assert!(!findings.is_empty());
}
#[test]
fn test_e2e_scala_command_injection() {
let findings = assert_detects_vuln(
"Main.scala",
"object M { def handler(request: Request): Unit = { val cmd = request.getQueryString(\"cmd\").get; Runtime.getRuntime.exec(cmd) } }\n",
VulnType::CommandInjection,
)
.unwrap();
assert!(!findings.is_empty());
}
#[test]
fn test_e2e_elixir_command_injection() {
let findings = assert_detects_vuln(
"app.ex",
"defmodule App do\n def handler(conn) do\n cmd = conn.params[\"cmd\"]\n System.cmd(\"sh\", [cmd])\n end\nend\n",
VulnType::CommandInjection,
)
.unwrap();
assert!(!findings.is_empty());
}
#[test]
fn test_e2e_lua_command_injection() {
let findings = assert_detects_vuln(
"app.lua",
"function handler()\n local cmd = ngx.req.get_uri_args()['cmd']\n os.execute(cmd)\nend\n",
VulnType::CommandInjection,
)
.unwrap();
assert!(!findings.is_empty());
}
#[test]
fn test_e2e_luau_command_injection() {
let findings = assert_detects_vuln(
"app.luau",
"local function handler()\n local cmd = os.getenv(\"CMD\")\n os.execute(cmd)\nend\n",
VulnType::CommandInjection,
)
.unwrap();
assert!(!findings.is_empty());
}
#[test]
fn test_e2e_ocaml_command_injection() {
let findings = assert_detects_vuln(
"main.ml",
"let handler () =\n let cmd = Sys.getenv \"CMD\" in\n Sys.command cmd\n",
VulnType::CommandInjection,
)
.unwrap();
assert!(!findings.is_empty());
}
#[test]
fn test_e2e_python_ssrf_requests_get() {
let findings = assert_detects_vuln(
"vuln.py",
"def h():\n target = request.args.get(\"url\")\n requests.get(target)\n",
VulnType::Ssrf,
)
.unwrap();
assert!(
!findings.is_empty(),
"VAL-007: Python `requests.get(target)` with tainted target must produce >= 1 SSRF finding."
);
assert!(findings.iter().all(|f| f.vuln_type == VulnType::Ssrf));
}
#[test]
fn test_e2e_python_ssrf_urllib_urlopen() {
let findings = assert_detects_vuln(
"vuln.py",
"def h():\n target = request.args.get(\"url\")\n urllib.request.urlopen(target)\n",
VulnType::Ssrf,
)
.unwrap();
assert!(!findings.is_empty(),
"VAL-007: Python `urllib.request.urlopen(target)` with tainted target must produce >= 1 SSRF finding.");
}
#[test]
fn test_e2e_python_ssrf_httpx_get() {
let findings = assert_detects_vuln(
"vuln.py",
"def h():\n target = request.args.get(\"url\")\n httpx.get(target)\n",
VulnType::Ssrf,
)
.unwrap();
assert!(!findings.is_empty(),
"VAL-007: Python `httpx.get(target)` with tainted target must produce >= 1 SSRF finding.");
}
#[test]
fn test_e2e_typescript_ssrf_fetch() {
let findings = assert_detects_vuln(
"vuln.ts",
"async function h(req: Request) {\n const target = req.query.url;\n await fetch(target);\n}\n",
VulnType::Ssrf,
)
.unwrap();
assert!(!findings.is_empty(),
"VAL-007: TypeScript `fetch(target)` with tainted target must produce >= 1 SSRF finding.");
}
#[test]
fn test_e2e_typescript_ssrf_axios_get() {
let findings = assert_detects_vuln(
"vuln.ts",
"async function h(req: any) {\n const target = req.query.url;\n await axios.get(target);\n}\n",
VulnType::Ssrf,
)
.unwrap();
assert!(!findings.is_empty(),
"VAL-007: TypeScript `axios.get(target)` with tainted target must produce >= 1 SSRF finding.");
}
#[test]
fn test_e2e_javascript_ssrf_fetch() {
let findings = assert_detects_vuln(
"vuln.js",
"function h(req) {\n const target = req.query.url;\n fetch(target);\n}\n",
VulnType::Ssrf,
)
.unwrap();
assert!(!findings.is_empty(),
"VAL-007: JavaScript `fetch(target)` with tainted target must produce >= 1 SSRF finding.");
}
#[test]
fn test_e2e_go_ssrf_http_get() {
let findings = assert_detects_vuln(
"vuln.go",
"package main\nimport \"net/http\"\nfunc h(r *http.Request) {\n target := r.URL.Query().Get(\"url\")\n http.Get(target)\n}\n",
VulnType::Ssrf,
)
.unwrap();
assert!(!findings.is_empty(),
"VAL-007: Go `http.Get(target)` with tainted target must produce >= 1 SSRF finding.");
}
#[test]
fn test_e2e_go_ssrf_http_post() {
let findings = assert_detects_vuln(
"vuln.go",
"package main\nimport \"net/http\"\nfunc h(r *http.Request, body []byte) {\n target := r.URL.Query().Get(\"url\")\n http.Post(target, \"application/json\", nil)\n}\n",
VulnType::Ssrf,
)
.unwrap();
assert!(!findings.is_empty(),
"VAL-007: Go `http.Post(target, ...)` with tainted target must produce >= 1 SSRF finding.");
}
#[test]
fn test_e2e_go_ssrf_http_newrequest() {
let findings = assert_detects_vuln(
"vuln.go",
"package main\nimport \"net/http\"\nfunc h(r *http.Request) {\n target := r.URL.Query().Get(\"url\")\n http.NewRequest(\"GET\", target, nil)\n}\n",
VulnType::Ssrf,
)
.unwrap();
assert!(!findings.is_empty(),
"VAL-007: Go `http.NewRequest(method, target, body)` with tainted target must produce >= 1 SSRF finding.");
}
#[test]
fn test_e2e_java_ssrf_url_openconnection() {
let findings = assert_detects_vuln(
"Vuln.java",
"public class V { public void h(HttpServletRequest request) throws Exception { String target = request.getParameter(\"url\"); new URL(target).openConnection(); } }\n",
VulnType::Ssrf,
)
.unwrap();
assert!(!findings.is_empty(),
"VAL-007: Java `new URL(target).openConnection()` with tainted target must produce >= 1 SSRF finding.");
}
#[test]
fn test_e2e_rust_ssrf_reqwest_get() {
let findings = assert_detects_vuln(
"main.rs",
"fn handler() {\n let target = std::env::var(\"URL\").unwrap();\n reqwest::get(target);\n}\n",
VulnType::Ssrf,
)
.unwrap();
assert!(!findings.is_empty(),
"VAL-007: Rust `reqwest::get(target)` with tainted target must produce >= 1 SSRF finding.");
}
#[test]
fn test_e2e_ruby_ssrf_net_http_get() {
let findings = assert_detects_vuln(
"app.rb",
"def handler\n target = params[:url]\n Net::HTTP.get(target)\nend\n",
VulnType::Ssrf,
)
.unwrap();
assert!(!findings.is_empty(),
"VAL-007: Ruby `Net::HTTP.get(target)` with tainted target must produce >= 1 SSRF finding.");
}
#[test]
fn test_e2e_php_ssrf_file_get_contents() {
let findings = assert_detects_vuln(
"index.php",
"<?php\nfunction handler() {\n $target = $_GET['url'];\n file_get_contents($target);\n}\n",
VulnType::Ssrf,
)
.unwrap();
assert!(!findings.is_empty(),
"VAL-007: PHP `file_get_contents(target)` with tainted target must produce >= 1 SSRF finding.");
}
#[test]
fn test_e2e_ssrf_in_default_vuln_types() {
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("vuln.go");
std::fs::write(
&file,
"package main\nimport \"net/http\"\nfunc h(r *http.Request) { target := r.URL.Query().Get(\"u\"); http.Get(target) }\n",
)
.unwrap();
let findings = scan_file_vulns(&file, None).unwrap();
let ssrf_findings: Vec<_> = findings
.iter()
.filter(|f| f.vuln_type == VulnType::Ssrf)
.collect();
assert!(
!ssrf_findings.is_empty(),
"VAL-007: SSRF must be included in the default vuln_types list. Got findings: {:?}",
findings.iter().map(|f| f.vuln_type).collect::<Vec<_>>()
);
}
#[test]
fn test_fastpath_skip_function_with_no_taint_patterns() {
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("arith.py");
std::fs::write(
&file,
"def add(a, b):\n total = a + b\n return total * 2\n",
)
.unwrap();
let findings = scan_file_vulns(&file, None).unwrap();
assert!(
findings.is_empty(),
"FAST-PATH-1: pure-arithmetic function must produce 0 findings; got: {:?}",
findings.iter().map(|f| f.vuln_type).collect::<Vec<_>>()
);
let body = "def add(a, b):\n total = a + b\n return total * 2\n";
assert!(
!crate::security::taint::function_body_has_taint_pattern(
body,
Language::Python
),
"FAST-PATH-1: prefilter must report no source/sink pattern in pure-arithmetic body."
);
}
#[test]
fn test_fastpath_no_skip_function_with_source_or_sink() {
let body_source_only = "def h():\n target = request.args.get(\"q\")\n return target.upper()\n";
assert!(
crate::security::taint::function_body_has_taint_pattern(
body_source_only,
Language::Python
),
"FAST-PATH-2: prefilter must admit a body containing the source pattern `request.args`."
);
let body_sink_only = "def h(q):\n cursor.execute(\"SELECT 1\")\n";
assert!(
crate::security::taint::function_body_has_taint_pattern(
body_sink_only,
Language::Python
),
"FAST-PATH-2: prefilter must admit a body containing the sink pattern `.execute`."
);
let findings = assert_detects_vuln(
"vuln.py",
"def h():\n q = request.args.get(\"q\")\n cursor.execute(q)\n",
VulnType::SqlInjection,
)
.unwrap();
assert!(
!findings.is_empty(),
"FAST-PATH-2: source + sink in same function must yield >= 1 SqlInjection finding (proves full analysis ran)."
);
}
#[test]
fn test_fastpath_runs_full_analysis_on_string_literal_match() {
let body = "def doc():\n msg = \"see request.args in flask docs\"\n return msg\n";
assert!(
crate::security::taint::function_body_has_taint_pattern(
body,
Language::Python
),
"FAST-PATH-3: prefilter must admit a body where the source substring appears inside a string literal (correctness — superset of AST detector)."
);
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("doc.py");
std::fs::write(&file, body).unwrap();
let findings = scan_file_vulns(&file, None).unwrap();
assert!(
findings.is_empty(),
"FAST-PATH-3: string-literal-only match must produce 0 findings via AST suppression (not via prefilter skip); got: {:?}",
findings.iter().map(|f| (f.vuln_type, f.sink.line)).collect::<Vec<_>>()
);
}
#[test]
fn test_fastpath_needle_set_python_canonical() {
let needles = crate::security::taint::fastpath_pattern_strings(Language::Python);
assert!(!needles.is_empty(), "Python needle set must not be empty.");
for canonical in &[".execute", ".read", "eval", "exec", "request.args", "os.system", "os.environ"] {
assert!(
needles.contains(canonical),
"Python needle set missing canonical needle `{}`. Got: {:?}",
canonical,
needles
);
}
}
#[test]
fn test_fastpath_needle_set_nonempty_all_langs() {
for lang in [
Language::Python,
Language::TypeScript,
Language::JavaScript,
Language::Go,
Language::Java,
Language::Rust,
Language::C,
Language::Cpp,
Language::Ruby,
Language::Kotlin,
Language::Swift,
Language::CSharp,
Language::Scala,
Language::Php,
Language::Lua,
Language::Luau,
Language::Elixir,
Language::Ocaml,
] {
let needles = crate::security::taint::fastpath_pattern_strings(lang);
assert!(
!needles.is_empty(),
"FAST-PATH-NEEDLES-ALL-LANGS: needle set empty for {:?} — prefilter would skip every function and produce false-negatives.",
lang
);
}
}
}