use crate::security::taint::types::TaintLabel;
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum MatchStrategy {
Exact,
Prefix,
Suffix,
Contains,
Regex,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TaintSource {
pub pattern: String,
pub label: TaintLabel,
pub strategy: MatchStrategy,
pub description: String,
pub confidence: f64,
pub requires_context: Option<String>,
#[serde(skip)]
compiled_regex: Option<Regex>,
}
impl TaintSource {
pub fn exact(pattern: impl Into<String>, label: TaintLabel, description: impl Into<String>) -> Self {
Self {
pattern: pattern.into(),
label,
strategy: MatchStrategy::Exact,
description: description.into(),
confidence: 1.0,
requires_context: None,
compiled_regex: None,
}
}
pub fn prefix(pattern: impl Into<String>, label: TaintLabel, description: impl Into<String>) -> Self {
Self {
pattern: pattern.into(),
label,
strategy: MatchStrategy::Prefix,
description: description.into(),
confidence: 0.9,
requires_context: None,
compiled_regex: None,
}
}
pub fn contains(pattern: impl Into<String>, label: TaintLabel, description: impl Into<String>) -> Self {
Self {
pattern: pattern.into(),
label,
strategy: MatchStrategy::Contains,
description: description.into(),
confidence: 0.8,
requires_context: None,
compiled_regex: None,
}
}
pub fn suffix(pattern: impl Into<String>, label: TaintLabel, description: impl Into<String>) -> Self {
Self {
pattern: pattern.into(),
label,
strategy: MatchStrategy::Suffix,
description: description.into(),
confidence: 0.85,
requires_context: None,
compiled_regex: None,
}
}
pub fn regex(pattern: impl Into<String>, label: TaintLabel, description: impl Into<String>) -> Self {
let pattern_str = pattern.into();
let compiled = Regex::new(&pattern_str).ok();
Self {
pattern: pattern_str,
label,
strategy: MatchStrategy::Regex,
description: description.into(),
confidence: 0.85,
requires_context: None,
compiled_regex: compiled,
}
}
pub fn with_confidence(mut self, confidence: f64) -> Self {
self.confidence = confidence.clamp(0.0, 1.0);
self
}
#[allow(dead_code)]
pub fn with_context(mut self, context: impl Into<String>) -> Self {
self.requires_context = Some(context.into());
self
}
pub fn matches(&self, input: &str) -> bool {
match self.strategy {
MatchStrategy::Exact => input == self.pattern,
MatchStrategy::Prefix => input.starts_with(&self.pattern),
MatchStrategy::Suffix => input.ends_with(&self.pattern),
MatchStrategy::Contains => input.contains(&self.pattern),
MatchStrategy::Regex => {
if let Some(ref regex) = self.compiled_regex {
regex.is_match(input)
} else {
Regex::new(&self.pattern)
.map(|r| r.is_match(input))
.unwrap_or(false)
}
}
}
}
}
#[derive(Debug, Default)]
pub struct SourceRegistry {
sources: Vec<TaintSource>,
by_label: HashMap<TaintLabel, Vec<usize>>,
}
impl SourceRegistry {
pub fn new() -> Self {
Self::default()
}
pub fn add(&mut self, source: TaintSource) {
let idx = self.sources.len();
let label = source.label.clone();
self.sources.push(source);
self.by_label.entry(label).or_default().push(idx);
}
pub fn find_matches(&self, input: &str) -> Vec<&TaintSource> {
self.sources.iter().filter(|s| s.matches(input)).collect()
}
#[allow(dead_code)]
pub fn sources_for_label(&self, label: &TaintLabel) -> Vec<&TaintSource> {
self.by_label
.get(label)
.map(|indices| indices.iter().map(|&i| &self.sources[i]).collect())
.unwrap_or_default()
}
pub fn all_sources(&self) -> &[TaintSource] {
&self.sources
}
#[allow(dead_code)]
pub fn len(&self) -> usize {
self.sources.len()
}
pub fn is_empty(&self) -> bool {
self.sources.is_empty()
}
}
pub fn get_python_sources() -> SourceRegistry {
let mut registry = SourceRegistry::new();
registry.add(TaintSource::prefix("request.args", TaintLabel::UserInput, "Flask query string"));
registry.add(TaintSource::prefix("request.form", TaintLabel::UserInput, "Flask form data"));
registry.add(TaintSource::prefix("request.data", TaintLabel::UserInput, "Flask raw body"));
registry.add(TaintSource::prefix("request.json", TaintLabel::UserInput, "Flask JSON body"));
registry.add(TaintSource::prefix("request.files", TaintLabel::UserInput, "Flask uploads"));
registry.add(TaintSource::prefix("request.values", TaintLabel::UserInput, "Flask args+form"));
registry.add(TaintSource::prefix("request.headers", TaintLabel::HttpHeader, "Flask headers"));
registry.add(TaintSource::prefix("request.cookies", TaintLabel::Cookie, "Flask cookies"));
registry.add(TaintSource::exact("request.path", TaintLabel::UrlData, "Flask URL path"));
registry.add(TaintSource::exact("request.url", TaintLabel::UrlData, "Flask full URL"));
registry.add(TaintSource::prefix("request.GET", TaintLabel::UserInput, "Django GET params"));
registry.add(TaintSource::prefix("request.POST", TaintLabel::UserInput, "Django POST params"));
registry.add(TaintSource::prefix("request.FILES", TaintLabel::UserInput, "Django uploads"));
registry.add(TaintSource::prefix("request.COOKIES", TaintLabel::Cookie, "Django cookies"));
registry.add(TaintSource::prefix("request.META", TaintLabel::HttpHeader, "Django metadata"));
registry.add(TaintSource::exact("request.body", TaintLabel::UserInput, "Django raw body"));
registry.add(TaintSource::regex(r"\bQuery\(", TaintLabel::UserInput, "FastAPI Query"));
registry.add(TaintSource::regex(r"\bBody\(", TaintLabel::UserInput, "FastAPI Body"));
registry.add(TaintSource::regex(r"\bPath\(", TaintLabel::UrlData, "FastAPI Path"));
registry.add(TaintSource::regex(r"\bHeader\(", TaintLabel::HttpHeader, "FastAPI Header"));
registry.add(TaintSource::regex(r"\bCookie\(", TaintLabel::Cookie, "FastAPI Cookie"));
registry.add(TaintSource::regex(r"\bForm\(", TaintLabel::UserInput, "FastAPI Form"));
registry.add(TaintSource::regex(r"\bFile\(", TaintLabel::UserInput, "FastAPI File"));
registry.add(TaintSource::exact("input", TaintLabel::Stdin, "Python input()"));
registry.add(TaintSource::exact("input()", TaintLabel::Stdin, "Python input()"));
registry.add(TaintSource::exact("sys.stdin", TaintLabel::Stdin, "Standard input"));
registry.add(TaintSource::prefix("sys.stdin.read", TaintLabel::Stdin, "Reading stdin"));
registry.add(TaintSource::exact("sys.argv", TaintLabel::ProcessArgs, "CLI arguments"));
registry.add(TaintSource::prefix("os.environ", TaintLabel::Environment, "Environment dict"));
registry.add(TaintSource::exact("os.getenv", TaintLabel::Environment, "Get env var"));
registry.add(TaintSource::exact("open", TaintLabel::FileContent, "File open").with_confidence(0.7));
registry.add(TaintSource::suffix(".read()", TaintLabel::FileContent, "File read"));
registry.add(TaintSource::suffix(".readline()", TaintLabel::FileContent, "File readline"));
registry.add(TaintSource::suffix(".readlines()", TaintLabel::FileContent, "File readlines"));
registry.add(TaintSource::prefix("requests.get", TaintLabel::NetworkData, "HTTP GET"));
registry.add(TaintSource::prefix("requests.post", TaintLabel::NetworkData, "HTTP POST"));
registry.add(TaintSource::prefix("httpx.", TaintLabel::NetworkData, "HTTPX client"));
registry.add(TaintSource::prefix("aiohttp.", TaintLabel::NetworkData, "AIOHTTP client"));
registry.add(TaintSource::suffix(".fetchone()", TaintLabel::DatabaseQuery, "DB fetch one"));
registry.add(TaintSource::suffix(".fetchall()", TaintLabel::DatabaseQuery, "DB fetch all"));
registry.add(TaintSource::suffix(".fetchmany(", TaintLabel::DatabaseQuery, "DB fetch many"));
registry.add(TaintSource::exact("json.loads", TaintLabel::DeserializedData, "JSON deserialize"));
registry.add(TaintSource::exact("json.load", TaintLabel::DeserializedData, "JSON file deserialize"));
registry.add(TaintSource::exact("pickle.loads", TaintLabel::DeserializedData, "Pickle deserialize"));
registry.add(TaintSource::exact("pickle.load", TaintLabel::DeserializedData, "Pickle file deserialize"));
registry.add(TaintSource::exact("yaml.load", TaintLabel::DeserializedData, "YAML deserialize"));
registry.add(TaintSource::suffix(".json()", TaintLabel::ExternalApi, "Response JSON"));
registry.add(TaintSource::suffix(".text", TaintLabel::ExternalApi, "Response text").with_confidence(0.7));
registry
}
pub fn get_typescript_sources() -> SourceRegistry {
let mut registry = SourceRegistry::new();
registry.add(TaintSource::prefix("req.body", TaintLabel::UserInput, "Express body"));
registry.add(TaintSource::prefix("req.query", TaintLabel::UserInput, "Express query"));
registry.add(TaintSource::prefix("req.params", TaintLabel::UrlData, "Express params"));
registry.add(TaintSource::prefix("req.headers", TaintLabel::HttpHeader, "Express headers"));
registry.add(TaintSource::prefix("req.cookies", TaintLabel::Cookie, "Express cookies"));
registry.add(TaintSource::exact("req.path", TaintLabel::UrlData, "Express path"));
registry.add(TaintSource::exact("req.url", TaintLabel::UrlData, "Express URL"));
registry.add(TaintSource::prefix("request.body", TaintLabel::UserInput, "Fastify body"));
registry.add(TaintSource::prefix("request.query", TaintLabel::UserInput, "Fastify query"));
registry.add(TaintSource::prefix("request.params", TaintLabel::UrlData, "Fastify params"));
registry.add(TaintSource::prefix("request.headers", TaintLabel::HttpHeader, "Fastify headers"));
registry.add(TaintSource::exact("process.argv", TaintLabel::ProcessArgs, "Node CLI args"));
registry.add(TaintSource::prefix("process.env", TaintLabel::Environment, "Node env vars"));
registry.add(TaintSource::exact("process.stdin", TaintLabel::Stdin, "Node stdin"));
registry.add(TaintSource::prefix("document.location", TaintLabel::UrlData, "Browser location"));
registry.add(TaintSource::prefix("window.location", TaintLabel::UrlData, "Window location"));
registry.add(TaintSource::exact("location.href", TaintLabel::UrlData, "Current URL"));
registry.add(TaintSource::exact("location.search", TaintLabel::UrlData, "URL query string"));
registry.add(TaintSource::exact("location.hash", TaintLabel::UrlData, "URL hash"));
registry.add(TaintSource::prefix("document.cookie", TaintLabel::Cookie, "Document cookies"));
registry.add(TaintSource::suffix(".value", TaintLabel::UserInput, "DOM element value").with_confidence(0.6));
registry.add(TaintSource::suffix(".innerHTML", TaintLabel::UserInput, "DOM innerHTML").with_confidence(0.5));
registry.add(TaintSource::exact("FormData", TaintLabel::UserInput, "FormData object"));
registry.add(TaintSource::exact("fetch", TaintLabel::NetworkData, "Fetch API").with_confidence(0.8));
registry.add(TaintSource::prefix("axios.", TaintLabel::NetworkData, "Axios HTTP client"));
registry.add(TaintSource::exact("XMLHttpRequest", TaintLabel::NetworkData, "XHR response"));
registry.add(TaintSource::suffix(".responseText", TaintLabel::NetworkData, "XHR response text"));
registry.add(TaintSource::exact("fs.readFileSync", TaintLabel::FileContent, "Sync file read"));
registry.add(TaintSource::exact("fs.readFile", TaintLabel::FileContent, "Async file read"));
registry.add(TaintSource::prefix("fs.promises.readFile", TaintLabel::FileContent, "Promise file read"));
registry.add(TaintSource::exact("FileReader", TaintLabel::FileContent, "Browser FileReader"));
registry.add(TaintSource::exact("JSON.parse", TaintLabel::DeserializedData, "JSON parsing"));
registry.add(TaintSource::exact("eval", TaintLabel::DeserializedData, "JavaScript eval"));
registry.add(TaintSource::regex(r"new\s+Function\(", TaintLabel::DeserializedData, "Dynamic function"));
registry.add(TaintSource::suffix(".onmessage", TaintLabel::NetworkData, "WebSocket message").with_confidence(0.7));
registry.add(TaintSource::suffix("event.data", TaintLabel::NetworkData, "WebSocket data").with_confidence(0.7));
registry
}
pub fn get_go_sources() -> SourceRegistry {
let mut registry = SourceRegistry::new();
registry.add(TaintSource::prefix("r.URL.Query", TaintLabel::UserInput, "URL query params"));
registry.add(TaintSource::prefix("r.FormValue", TaintLabel::UserInput, "Form value"));
registry.add(TaintSource::prefix("r.PostFormValue", TaintLabel::UserInput, "POST form value"));
registry.add(TaintSource::prefix("r.Body", TaintLabel::UserInput, "Request body"));
registry.add(TaintSource::prefix("r.Header", TaintLabel::HttpHeader, "Request headers"));
registry.add(TaintSource::suffix(".Cookie(", TaintLabel::Cookie, "Cookie value"));
registry.add(TaintSource::exact("r.URL.Path", TaintLabel::UrlData, "URL path"));
registry.add(TaintSource::suffix(".Query(", TaintLabel::UserInput, "Gin query param"));
registry.add(TaintSource::suffix(".PostForm(", TaintLabel::UserInput, "Gin POST form"));
registry.add(TaintSource::suffix(".Param(", TaintLabel::UrlData, "Gin URL param"));
registry.add(TaintSource::suffix(".ShouldBindJSON(", TaintLabel::UserInput, "Gin JSON binding"));
registry.add(TaintSource::suffix(".QueryParam(", TaintLabel::UserInput, "Echo query param"));
registry.add(TaintSource::suffix(".FormValue(", TaintLabel::UserInput, "Echo form value"));
registry.add(TaintSource::suffix(".Bind(", TaintLabel::UserInput, "Echo request binding"));
registry.add(TaintSource::exact("os.Args", TaintLabel::ProcessArgs, "CLI arguments"));
registry.add(TaintSource::exact("os.Getenv", TaintLabel::Environment, "Environment variable"));
registry.add(TaintSource::prefix("os.Stdin", TaintLabel::Stdin, "Standard input"));
registry.add(TaintSource::exact("os.ReadFile", TaintLabel::FileContent, "Read entire file"));
registry.add(TaintSource::exact("ioutil.ReadFile", TaintLabel::FileContent, "Read file (deprecated)"));
registry.add(TaintSource::suffix(".Read(", TaintLabel::FileContent, "Read from reader").with_confidence(0.5));
registry.add(TaintSource::suffix(".ReadAll(", TaintLabel::FileContent, "Read all from reader"));
registry.add(TaintSource::exact("http.Get", TaintLabel::NetworkData, "HTTP GET"));
registry.add(TaintSource::exact("http.Post", TaintLabel::NetworkData, "HTTP POST"));
registry.add(TaintSource::suffix("Client.Do(", TaintLabel::NetworkData, "HTTP client request"));
registry.add(TaintSource::suffix("json.Unmarshal(", TaintLabel::DeserializedData, "JSON unmarshal"));
registry.add(TaintSource::suffix("json.Decode(", TaintLabel::DeserializedData, "JSON decode"));
registry.add(TaintSource::suffix("xml.Unmarshal(", TaintLabel::DeserializedData, "XML unmarshal"));
registry.add(TaintSource::suffix("gob.Decode(", TaintLabel::DeserializedData, "Gob decode"));
registry.add(TaintSource::suffix(".Scan(", TaintLabel::DatabaseQuery, "Database row scan"));
registry.add(TaintSource::suffix(".QueryRow(", TaintLabel::DatabaseQuery, "Single row query"));
registry
}
pub fn get_rust_sources() -> SourceRegistry {
let mut registry = SourceRegistry::new();
registry.add(TaintSource::exact("Query", TaintLabel::UserInput, "Axum query params").with_confidence(0.8));
registry.add(TaintSource::exact("Form", TaintLabel::UserInput, "Axum form data").with_confidence(0.8));
registry.add(TaintSource::exact("Json", TaintLabel::UserInput, "Axum JSON body").with_confidence(0.8));
registry.add(TaintSource::exact("Path", TaintLabel::UrlData, "Axum path params").with_confidence(0.8));
registry.add(TaintSource::exact("TypedHeader", TaintLabel::HttpHeader, "Axum typed header"));
registry.add(TaintSource::exact("web::Query", TaintLabel::UserInput, "Actix query params"));
registry.add(TaintSource::exact("web::Form", TaintLabel::UserInput, "Actix form data"));
registry.add(TaintSource::exact("web::Json", TaintLabel::UserInput, "Actix JSON body"));
registry.add(TaintSource::exact("web::Path", TaintLabel::UrlData, "Actix path params"));
registry.add(TaintSource::prefix("req.headers()", TaintLabel::HttpHeader, "Actix headers"));
registry.add(TaintSource::exact("std::env::args", TaintLabel::ProcessArgs, "CLI arguments"));
registry.add(TaintSource::exact("std::env::var", TaintLabel::Environment, "Environment variable"));
registry.add(TaintSource::exact("std::io::stdin", TaintLabel::Stdin, "Standard input"));
registry.add(TaintSource::exact("std::fs::read", TaintLabel::FileContent, "Read file to bytes"));
registry.add(TaintSource::exact("std::fs::read_to_string", TaintLabel::FileContent, "Read file to string"));
registry.add(TaintSource::suffix(".read_to_string(", TaintLabel::FileContent, "Read to string method"));
registry.add(TaintSource::suffix(".read_to_end(", TaintLabel::FileContent, "Read to end method"));
registry.add(TaintSource::prefix("reqwest::", TaintLabel::NetworkData, "Reqwest HTTP client"));
registry.add(TaintSource::suffix(".text().await", TaintLabel::NetworkData, "Response text"));
registry.add(TaintSource::suffix(".json().await", TaintLabel::NetworkData, "Response JSON"));
registry.add(TaintSource::prefix("serde_json::from_", TaintLabel::DeserializedData, "JSON deserialize"));
registry.add(TaintSource::prefix("serde_yaml::from_", TaintLabel::DeserializedData, "YAML deserialize"));
registry.add(TaintSource::prefix("toml::from_", TaintLabel::DeserializedData, "TOML deserialize"));
registry.add(TaintSource::suffix(".fetch_one(", TaintLabel::DatabaseQuery, "DB fetch one"));
registry.add(TaintSource::suffix(".fetch_all(", TaintLabel::DatabaseQuery, "DB fetch all"));
registry.add(TaintSource::suffix(".fetch_optional(", TaintLabel::DatabaseQuery, "DB fetch optional"));
registry
}
pub fn get_sources_for_language(language: &str) -> SourceRegistry {
match language.to_lowercase().as_str() {
"python" | "py" => get_python_sources(),
"typescript" | "ts" | "javascript" | "js" | "tsx" | "jsx" => get_typescript_sources(),
"go" | "golang" => get_go_sources(),
"rust" | "rs" => get_rust_sources(),
_ => SourceRegistry::new(),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_python_flask_sources() {
let registry = get_python_sources();
let matches = registry.find_matches("request.args.get('id')");
assert!(!matches.is_empty());
assert!(matches.iter().any(|s| s.label == TaintLabel::UserInput));
}
#[test]
fn test_python_stdlib_sources() {
let registry = get_python_sources();
let matches = registry.find_matches("input()");
assert!(!matches.is_empty());
assert!(matches.iter().any(|s| s.label == TaintLabel::Stdin));
let matches = registry.find_matches("sys.argv");
assert!(!matches.is_empty());
assert!(matches.iter().any(|s| s.label == TaintLabel::ProcessArgs));
}
#[test]
fn test_typescript_express_sources() {
let registry = get_typescript_sources();
let matches = registry.find_matches("req.body.username");
assert!(!matches.is_empty());
assert!(matches.iter().any(|s| s.label == TaintLabel::UserInput));
}
#[test]
fn test_match_strategies() {
let exact = TaintSource::exact("input", TaintLabel::Stdin, "test");
assert!(exact.matches("input"));
assert!(!exact.matches("input()"));
let prefix = TaintSource::prefix("request.args", TaintLabel::UserInput, "test");
assert!(prefix.matches("request.args.get('id')"));
assert!(!prefix.matches("args"));
let suffix = TaintSource::suffix(".read()", TaintLabel::FileContent, "test");
assert!(suffix.matches("file.read()"));
assert!(!suffix.matches("read()x"));
}
#[test]
fn test_regex_matching() {
let regex_source = TaintSource::regex(r"\bQuery\(", TaintLabel::UserInput, "test");
assert!(regex_source.matches("Query('name')"));
assert!(regex_source.matches("x = Query("));
assert!(!regex_source.matches("QueryBuilder"));
}
#[test]
fn test_get_sources_for_language() {
let py = get_sources_for_language("python");
assert!(!py.is_empty());
let ts = get_sources_for_language("TypeScript");
assert!(!ts.is_empty());
let unknown = get_sources_for_language("brainfuck");
assert!(unknown.is_empty());
}
}