mod c;
mod cpp;
mod go;
mod java;
mod javascript;
mod php;
mod python;
pub(crate) mod ruby;
mod rust;
mod typescript;
use bitflags::bitflags;
use once_cell::sync::Lazy;
use phf::Map;
use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
use std::collections::HashMap;
#[derive(Debug, Clone, Copy)]
pub struct LabelRule {
pub matchers: &'static [&'static str],
pub label: DataLabel,
pub case_sensitive: bool,
}
pub const ALL_ARGS_PAYLOAD: &[usize] = &[usize::MAX];
#[derive(Debug, Clone, Copy)]
pub enum GateActivation {
ValueMatch,
Destination {
object_destination_fields: &'static [&'static str],
},
}
#[derive(Debug, Clone, Copy)]
pub struct SinkGate {
pub callee_matcher: &'static str,
pub arg_index: usize,
pub dangerous_values: &'static [&'static str],
pub dangerous_prefixes: &'static [&'static str],
pub label: DataLabel,
pub case_sensitive: bool,
pub payload_args: &'static [usize],
pub keyword_name: Option<&'static str>,
pub dangerous_kwargs: &'static [(&'static str, &'static [&'static str])],
pub activation: GateActivation,
}
bitflags! {
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Cap: u16 {
const ENV_VAR = 0b0000_0000_0000_0001; const HTML_ESCAPE = 0b0000_0000_0000_0010; const SHELL_ESCAPE = 0b0000_0000_0000_0100; const URL_ENCODE = 0b0000_0000_0000_1000; const JSON_PARSE = 0b0000_0000_0001_0000; const FILE_IO = 0b0000_0000_0010_0000; const FMT_STRING = 0b0000_0000_0100_0000; const SQL_QUERY = 0b0000_0000_1000_0000; const DESERIALIZE = 0b0000_0001_0000_0000; const SSRF = 0b0000_0010_0000_0000; const CODE_EXEC = 0b0000_0100_0000_0000; const CRYPTO = 0b0000_1000_0000_0000; const UNAUTHORIZED_ID = 0b0001_0000_0000_0000; const DATA_EXFIL = 0b0010_0000_0000_0000; }
}
impl Default for Cap {
fn default() -> Self {
Cap::empty()
}
}
impl serde::Serialize for Cap {
fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
s.serialize_u16(self.bits())
}
}
impl<'de> serde::Deserialize<'de> for Cap {
fn deserialize<D: serde::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
let bits = u16::deserialize(d)?;
Ok(Cap::from_bits_truncate(bits))
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Kind {
If,
InfiniteLoop,
While,
For,
CallFn,
CallMethod,
CallMacro,
Break,
Continue,
Return,
Block,
SourceFile,
Function,
Assignment,
CallWrapper,
Try,
Throw,
Switch,
Trivia,
Seq,
Other,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum DataLabel {
Source(Cap),
Sanitizer(Cap),
Sink(Cap),
}
pub struct ParamConfig {
pub params_field: &'static str,
pub param_node_kinds: &'static [&'static str],
pub self_param_kinds: &'static [&'static str],
pub ident_fields: &'static [&'static str],
}
static DEFAULT_PARAM_CONFIG: ParamConfig = ParamConfig {
params_field: "parameters",
param_node_kinds: &["parameter", "identifier"],
self_param_kinds: &[],
ident_fields: &["name", "pattern"],
};
pub struct ArgPropagation {
pub callee: &'static str,
pub from_args: &'static [usize],
pub to_args: &'static [usize],
}
pub fn output_param_source_positions(lang: &str, callee: &str) -> Option<&'static [usize]> {
let registry: &[(&str, &[usize])] = match lang {
"c" => c::OUTPUT_PARAM_SOURCES,
"cpp" => cpp::OUTPUT_PARAM_SOURCES,
_ => return None,
};
let normalized = callee
.rsplit("::")
.next()
.unwrap_or(callee)
.rsplit('.')
.next()
.unwrap_or(callee);
registry
.iter()
.find(|(name, _)| name.eq_ignore_ascii_case(normalized))
.map(|(_, positions)| *positions)
}
pub fn arg_propagation(lang: &str, callee: &str) -> Option<&'static ArgPropagation> {
let registry: &[ArgPropagation] = match lang {
"c" => c::ARG_PROPAGATIONS,
"cpp" => cpp::ARG_PROPAGATIONS,
_ => return None,
};
let normalized = callee
.rsplit("::")
.next()
.unwrap_or(callee)
.rsplit('.')
.next()
.unwrap_or(callee);
registry
.iter()
.find(|p| p.callee.eq_ignore_ascii_case(normalized))
}
static REGISTRY: Lazy<HashMap<&'static str, &'static [LabelRule]>> = Lazy::new(|| {
let mut m = HashMap::new();
m.insert("rust", rust::RULES);
m.insert("rs", rust::RULES);
m.insert("javascript", javascript::RULES);
m.insert("js", javascript::RULES);
m.insert("typescript", typescript::RULES);
m.insert("ts", typescript::RULES);
m.insert("python", python::RULES);
m.insert("py", python::RULES);
m.insert("go", go::RULES);
m.insert("java", java::RULES);
m.insert("c", c::RULES);
m.insert("cpp", cpp::RULES);
m.insert("c++", cpp::RULES);
m.insert("php", php::RULES);
m.insert("ruby", ruby::RULES);
m.insert("rb", ruby::RULES);
m
});
static GATED_REGISTRY: Lazy<HashMap<&'static str, &'static [SinkGate]>> = Lazy::new(|| {
let mut m = HashMap::new();
m.insert("javascript", javascript::GATED_SINKS);
m.insert("js", javascript::GATED_SINKS);
m.insert("typescript", typescript::GATED_SINKS);
m.insert("ts", typescript::GATED_SINKS);
m.insert("python", python::GATED_SINKS);
m.insert("py", python::GATED_SINKS);
m.insert("go", go::GATED_SINKS);
m.insert("php", php::GATED_SINKS);
m.insert("c", c::GATED_SINKS);
m.insert("cpp", cpp::GATED_SINKS);
m.insert("c++", cpp::GATED_SINKS);
m
});
static EXCLUDES: Lazy<HashMap<&'static str, &'static [&'static str]>> = Lazy::new(|| {
let mut m = HashMap::new();
m.insert("javascript", javascript::EXCLUDES);
m.insert("js", javascript::EXCLUDES);
m.insert("typescript", typescript::EXCLUDES);
m.insert("ts", typescript::EXCLUDES);
m
});
pub(crate) fn is_excluded(lang: &str, trimmed: &[u8]) -> bool {
let excludes = match EXCLUDES.get(lang).or_else(|| {
let key = lang.to_ascii_lowercase();
EXCLUDES.get(key.as_str())
}) {
Some(e) => *e,
None => return false,
};
for &pat in excludes {
if match_suffix_cs(trimmed, pat.as_bytes(), false) {
return true;
}
}
false
}
type FastMap = &'static Map<&'static str, Kind>;
pub(crate) static CLASSIFIERS: Lazy<HashMap<&'static str, FastMap>> = Lazy::new(|| {
let mut m = HashMap::new();
m.insert("rust", &rust::KINDS);
m.insert("rs", &rust::KINDS);
m.insert("javascript", &javascript::KINDS);
m.insert("js", &javascript::KINDS);
m.insert("typescript", &typescript::KINDS);
m.insert("ts", &typescript::KINDS);
m.insert("python", &python::KINDS);
m.insert("py", &python::KINDS);
m.insert("go", &go::KINDS);
m.insert("java", &java::KINDS);
m.insert("c", &c::KINDS);
m.insert("cpp", &cpp::KINDS);
m.insert("c++", &cpp::KINDS);
m.insert("php", &php::KINDS);
m.insert("ruby", &ruby::KINDS);
m.insert("rb", &ruby::KINDS);
m
});
static PARAM_CONFIGS: Lazy<HashMap<&'static str, &'static ParamConfig>> = Lazy::new(|| {
let mut m = HashMap::new();
m.insert("rust", &rust::PARAM_CONFIG);
m.insert("rs", &rust::PARAM_CONFIG);
m.insert("javascript", &javascript::PARAM_CONFIG);
m.insert("js", &javascript::PARAM_CONFIG);
m.insert("typescript", &typescript::PARAM_CONFIG);
m.insert("ts", &typescript::PARAM_CONFIG);
m.insert("python", &python::PARAM_CONFIG);
m.insert("py", &python::PARAM_CONFIG);
m.insert("go", &go::PARAM_CONFIG);
m.insert("java", &java::PARAM_CONFIG);
m.insert("c", &c::PARAM_CONFIG);
m.insert("cpp", &cpp::PARAM_CONFIG);
m.insert("c++", &cpp::PARAM_CONFIG);
m.insert("php", &php::PARAM_CONFIG);
m.insert("ruby", &ruby::PARAM_CONFIG);
m.insert("rb", &ruby::PARAM_CONFIG);
m
});
pub fn param_config(lang: &str) -> &'static ParamConfig {
PARAM_CONFIGS
.get(lang)
.copied()
.unwrap_or(&DEFAULT_PARAM_CONFIG)
}
const JS_TS_HANDLER_PARAM_NAMES: &[&str] = &["userinput", "userid", "payload", "cmd", "input"];
pub fn is_js_ts_handler_param_name(name: &str) -> bool {
if name.is_empty() || !name.is_ascii() {
return false;
}
if JS_TS_HANDLER_PARAM_NAMES
.iter()
.any(|candidate| candidate.eq_ignore_ascii_case(name))
{
return true;
}
let bytes = name.as_bytes();
if bytes.len() >= 5
&& bytes[..4].eq_ignore_ascii_case(b"user")
&& (bytes[4].is_ascii_uppercase() || bytes[4] == b'_')
{
return true;
}
false
}
#[inline(always)]
pub fn lookup(lang: &str, raw: &str) -> Kind {
CLASSIFIERS
.get(lang)
.and_then(|m| m.get(raw).copied())
.unwrap_or(Kind::Other)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SourceKind {
UserInput,
Cookie,
Header,
EnvironmentConfig,
FileSystem,
Database,
CaughtException,
Unknown,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum Sensitivity {
Plain,
Sensitive,
Secret,
}
impl SourceKind {
pub fn sensitivity(self) -> Sensitivity {
match self {
SourceKind::UserInput => Sensitivity::Plain,
SourceKind::Cookie
| SourceKind::Header
| SourceKind::EnvironmentConfig
| SourceKind::FileSystem
| SourceKind::Database => Sensitivity::Sensitive,
SourceKind::CaughtException => Sensitivity::Sensitive,
SourceKind::Unknown => Sensitivity::Sensitive,
}
}
}
pub fn infer_source_kind(caps: Cap, callee: &str) -> SourceKind {
let cl = callee.to_ascii_lowercase();
if cl.contains("cookie") || cl.contains("session") {
return SourceKind::Cookie;
}
if cl.contains("header") {
return SourceKind::Header;
}
if cl.contains("argv")
|| cl.contains("stdin")
|| cl.contains("request")
|| cl.contains("form")
|| cl.contains("query")
|| cl.contains("params")
|| cl.contains("param")
|| cl.contains("input")
|| cl.contains("body")
|| cl.contains("location")
|| cl.contains("document.url")
|| cl.contains("document.referrer")
|| cl == "$_get"
|| cl == "$_post"
|| cl == "$_files"
|| cl == "_get"
|| cl == "_post"
|| cl == "_files"
{
return SourceKind::UserInput;
}
if cl.contains("env")
|| cl.contains("getenv")
|| cl.contains("environ")
|| cl.contains("config")
{
return SourceKind::EnvironmentConfig;
}
if cl.contains("read") || cl.contains("fopen") || cl.contains("open") {
if caps.contains(Cap::FILE_IO) {
return SourceKind::FileSystem;
}
}
if cl.contains("fetchone")
|| cl.contains("fetchall")
|| cl.contains("fetch_row")
|| cl.contains("query")
|| cl.contains("execute")
{
return SourceKind::Database;
}
SourceKind::Unknown
}
pub fn severity_for_source_kind(kind: SourceKind) -> crate::patterns::Severity {
match kind {
SourceKind::UserInput => crate::patterns::Severity::High,
SourceKind::Cookie => crate::patterns::Severity::High,
SourceKind::Header => crate::patterns::Severity::High,
SourceKind::EnvironmentConfig => crate::patterns::Severity::High,
SourceKind::FileSystem => crate::patterns::Severity::Medium,
SourceKind::Database => crate::patterns::Severity::Medium,
SourceKind::CaughtException => crate::patterns::Severity::Medium,
SourceKind::Unknown => crate::patterns::Severity::High,
}
}
#[derive(Debug, Clone)]
pub struct RuntimeLabelRule {
pub matchers: Vec<String>,
pub label: DataLabel,
pub case_sensitive: bool,
}
#[allow(dead_code)]
pub fn parse_cap(s: &str) -> Option<Cap> {
match s.to_ascii_lowercase().as_str() {
"env_var" => Some(Cap::ENV_VAR),
"html_escape" => Some(Cap::HTML_ESCAPE),
"shell_escape" => Some(Cap::SHELL_ESCAPE),
"url_encode" => Some(Cap::URL_ENCODE),
"json_parse" => Some(Cap::JSON_PARSE),
"file_io" => Some(Cap::FILE_IO),
"fmt_string" => Some(Cap::FMT_STRING),
"sql_query" => Some(Cap::SQL_QUERY),
"deserialize" => Some(Cap::DESERIALIZE),
"ssrf" => Some(Cap::SSRF),
"code_exec" => Some(Cap::CODE_EXEC),
"crypto" => Some(Cap::CRYPTO),
"unauthorized_id" => Some(Cap::UNAUTHORIZED_ID),
"data_exfil" | "data_exfiltration" => Some(Cap::DATA_EXFIL),
"all" => Some(Cap::all()),
_ => None,
}
}
#[derive(Debug, Clone, Default)]
pub struct LangAnalysisRules {
pub extra_labels: Vec<RuntimeLabelRule>,
pub terminators: Vec<String>,
pub event_handlers: Vec<String>,
pub frameworks: Vec<crate::utils::project::DetectedFramework>,
}
pub fn build_lang_rules(
config: &crate::utils::config::Config,
lang_slug: &str,
) -> LangAnalysisRules {
let mut extra_labels: Vec<RuntimeLabelRule> = Vec::new();
let mut terminators = Vec::new();
let mut event_handlers = Vec::new();
if let Some(lang_cfg) = config.analysis.languages.get(lang_slug) {
extra_labels.extend(lang_cfg.rules.iter().map(|r| {
use crate::utils::config::RuleKind;
let cap = r.cap.to_cap();
let label = match r.kind {
RuleKind::Source => DataLabel::Source(cap),
RuleKind::Sanitizer => DataLabel::Sanitizer(cap),
RuleKind::Sink => DataLabel::Sink(cap),
};
RuntimeLabelRule {
matchers: r.matchers.clone(),
label,
case_sensitive: r.case_sensitive,
}
}));
terminators = lang_cfg.terminators.clone();
event_handlers = lang_cfg.event_handlers.clone();
}
let frameworks = if let Some(ref fw_ctx) = config.framework_ctx {
extra_labels.extend(framework_rules_for_lang(lang_slug, fw_ctx));
fw_ctx.frameworks.clone()
} else {
Vec::new()
};
if config.scanner.enable_auth_as_taint {
extra_labels.extend(phase_c_auth_rules_for_lang(lang_slug));
}
LangAnalysisRules {
extra_labels,
terminators,
event_handlers,
frameworks,
}
}
fn phase_c_auth_rules_for_lang(lang_slug: &str) -> Vec<RuntimeLabelRule> {
match lang_slug {
"rust" | "rs" => rust::phase_c_auth_rules(),
_ => Vec::new(),
}
}
pub fn framework_rules_for_lang_pub(
lang_slug: &str,
ctx: &crate::utils::project::FrameworkContext,
) -> Vec<RuntimeLabelRule> {
framework_rules_for_lang(lang_slug, ctx)
}
fn framework_rules_for_lang(
lang_slug: &str,
ctx: &crate::utils::project::FrameworkContext,
) -> Vec<RuntimeLabelRule> {
match lang_slug {
"go" => go::framework_rules(ctx),
"ruby" | "rb" => ruby::framework_rules(ctx),
"java" => java::framework_rules(ctx),
"php" => php::framework_rules(ctx),
"python" | "py" => python::framework_rules(ctx),
"rust" | "rs" => rust::framework_rules(ctx),
"javascript" | "js" => javascript::framework_rules(ctx),
"typescript" | "ts" => typescript::framework_rules(ctx),
_ => Vec::new(),
}
}
#[inline]
fn ends_with_cs(haystack: &[u8], needle: &[u8], case_sensitive: bool) -> bool {
if needle.len() > haystack.len() {
return false;
}
let start = haystack.len() - needle.len();
if case_sensitive {
haystack[start..] == *needle
} else {
haystack[start..]
.iter()
.zip(needle)
.all(|(h, n)| h.eq_ignore_ascii_case(n))
}
}
#[inline]
fn starts_with_cs(haystack: &[u8], needle: &[u8], case_sensitive: bool) -> bool {
let (needle, _) = unpack_matcher(needle);
if needle.len() > haystack.len() {
return false;
}
if case_sensitive {
haystack[..needle.len()] == *needle
} else {
haystack[..needle.len()]
.iter()
.zip(needle)
.all(|(h, n)| h.eq_ignore_ascii_case(n))
}
}
#[inline]
fn match_suffix_cs(text: &[u8], matcher: &[u8], case_sensitive: bool) -> bool {
let (m, exact_only) = unpack_matcher(matcher);
if ends_with_cs(text, m, case_sensitive) {
let start = text.len() - m.len();
if exact_only {
start == 0
} else {
start == 0 || matches!(text[start - 1], b'.' | b':')
}
} else {
false
}
}
#[inline]
fn unpack_matcher(matcher: &[u8]) -> (&[u8], bool) {
if matcher.first() == Some(&b'=') {
(&matcher[1..], true)
} else {
(matcher, false)
}
}
pub fn classify(lang: &str, text: &str, extra: Option<&[RuntimeLabelRule]>) -> Option<DataLabel> {
let head = text.split(['(', '<']).next().unwrap_or("");
let trimmed = head.trim().as_bytes();
if is_excluded(lang, trimmed) {
return None;
}
let full_normalized = normalize_chained_call(text);
let full_norm_bytes = full_normalized.as_bytes();
if let Some(extras) = extra {
for rule in extras {
for raw in &rule.matchers {
let m = raw.as_bytes();
if m.last() == Some(&b'_') {
continue;
}
if match_suffix_cs(trimmed, m, rule.case_sensitive)
|| match_suffix_cs(full_norm_bytes, m, rule.case_sensitive)
{
return Some(rule.label);
}
}
}
for rule in extras {
for raw in &rule.matchers {
let m = raw.as_bytes();
if m.last() == Some(&b'_')
&& (starts_with_cs(trimmed, m, rule.case_sensitive)
|| starts_with_cs(full_norm_bytes, m, rule.case_sensitive))
{
return Some(rule.label);
}
}
}
}
let rules = REGISTRY.get(lang).or_else(|| {
let key = lang.to_ascii_lowercase();
REGISTRY.get(key.as_str())
})?;
for rule in *rules {
for raw in rule.matchers {
let m = raw.as_bytes();
if m.last() == Some(&b'_') {
continue;
}
if match_suffix_cs(trimmed, m, rule.case_sensitive)
|| match_suffix_cs(full_norm_bytes, m, rule.case_sensitive)
{
return Some(rule.label);
}
}
}
for rule in *rules {
for raw in rule.matchers {
let m = raw.as_bytes();
if m.last() == Some(&b'_')
&& (starts_with_cs(trimmed, m, rule.case_sensitive)
|| starts_with_cs(full_norm_bytes, m, rule.case_sensitive))
{
return Some(rule.label);
}
}
}
None
}
pub fn classify_all(
lang: &str,
text: &str,
extra: Option<&[RuntimeLabelRule]>,
) -> SmallVec<[DataLabel; 2]> {
let head = text.split(['(', '<']).next().unwrap_or("");
let trimmed = head.trim().as_bytes();
if is_excluded(lang, trimmed) {
return SmallVec::new();
}
let full_normalized = normalize_chained_call(text);
let full_norm_bytes = full_normalized.as_bytes();
let mut out: SmallVec<[DataLabel; 2]> = SmallVec::new();
#[inline]
fn push_dedup(out: &mut SmallVec<[DataLabel; 2]>, label: DataLabel) {
if !out.contains(&label) {
out.push(label);
}
}
if let Some(extras) = extra {
for rule in extras {
for raw in &rule.matchers {
let m = raw.as_bytes();
if m.last() == Some(&b'_') {
continue;
}
if match_suffix_cs(trimmed, m, rule.case_sensitive)
|| match_suffix_cs(full_norm_bytes, m, rule.case_sensitive)
{
push_dedup(&mut out, rule.label);
}
}
}
for rule in extras {
for raw in &rule.matchers {
let m = raw.as_bytes();
if m.last() == Some(&b'_')
&& (starts_with_cs(trimmed, m, rule.case_sensitive)
|| starts_with_cs(full_norm_bytes, m, rule.case_sensitive))
{
push_dedup(&mut out, rule.label);
}
}
}
}
let rules = REGISTRY.get(lang).or_else(|| {
let key = lang.to_ascii_lowercase();
REGISTRY.get(key.as_str())
});
if let Some(rules) = rules {
for rule in *rules {
for raw in rule.matchers {
let m = raw.as_bytes();
if m.last() == Some(&b'_') {
continue;
}
if match_suffix_cs(trimmed, m, rule.case_sensitive)
|| match_suffix_cs(full_norm_bytes, m, rule.case_sensitive)
{
push_dedup(&mut out, rule.label);
}
}
}
for rule in *rules {
for raw in rule.matchers {
let m = raw.as_bytes();
if m.last() == Some(&b'_')
&& (starts_with_cs(trimmed, m, rule.case_sensitive)
|| starts_with_cs(full_norm_bytes, m, rule.case_sensitive))
{
push_dedup(&mut out, rule.label);
}
}
}
}
out
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct GateMatch {
pub label: DataLabel,
pub payload_args: &'static [usize],
pub object_destination_fields: &'static [&'static str],
}
pub fn classify_gated_sink(
lang: &str,
callee_text: &str,
const_arg_at: impl Fn(usize) -> Option<String>,
const_keyword_arg: impl Fn(&str) -> Option<String>,
kwarg_present: impl Fn(&str) -> bool,
) -> SmallVec<[GateMatch; 2]> {
let mut out: SmallVec<[GateMatch; 2]> = SmallVec::new();
let gates = match GATED_REGISTRY.get(lang).or_else(|| {
let key = lang.to_ascii_lowercase();
GATED_REGISTRY.get(key.as_str())
}) {
Some(g) => g,
None => return out,
};
let callee_bytes = callee_text.as_bytes();
let normalized = normalize_chained_call(callee_text);
let normalized_bytes = normalized.as_bytes();
for gate in *gates {
let matcher = gate.callee_matcher.as_bytes();
if !match_suffix_cs(callee_bytes, matcher, gate.case_sensitive)
&& !match_suffix_cs(normalized_bytes, matcher, gate.case_sensitive)
{
continue;
}
if let GateActivation::Destination {
object_destination_fields,
} = gate.activation
{
out.push(GateMatch {
label: gate.label,
payload_args: gate.payload_args,
object_destination_fields,
});
continue;
}
if !gate.dangerous_kwargs.is_empty() && gate.keyword_name.is_none() {
let mut any_dangerous = false;
let mut any_dynamic_present = false;
for (name, values) in gate.dangerous_kwargs {
if !kwarg_present(name) {
continue; }
match const_keyword_arg(name) {
Some(v) => {
let lower = v.to_ascii_lowercase();
if values.iter().any(|dv| lower == dv.to_ascii_lowercase()) {
any_dangerous = true;
break;
}
}
None => {
any_dynamic_present = true;
}
}
}
if any_dangerous {
out.push(GateMatch {
label: gate.label,
payload_args: gate.payload_args,
object_destination_fields: &[],
});
continue;
}
if any_dynamic_present {
out.push(GateMatch {
label: gate.label,
payload_args: ALL_ARGS_PAYLOAD,
object_destination_fields: &[],
});
continue;
}
continue; }
let activation_value = if let Some(kw) = gate.keyword_name {
const_keyword_arg(kw)
} else {
const_arg_at(gate.arg_index)
};
match activation_value {
Some(value) => {
let lower = value.to_ascii_lowercase();
let is_dangerous = gate
.dangerous_values
.iter()
.any(|v| lower == v.to_ascii_lowercase())
|| gate
.dangerous_prefixes
.iter()
.any(|p| lower.starts_with(&p.to_ascii_lowercase()));
if is_dangerous {
out.push(GateMatch {
label: gate.label,
payload_args: gate.payload_args,
object_destination_fields: &[],
});
}
}
None => {
out.push(GateMatch {
label: gate.label,
payload_args: ALL_ARGS_PAYLOAD,
object_destination_fields: &[],
});
}
}
}
out
}
pub fn normalize_chained_call_for_classify(text: &str) -> String {
normalize_chained_call(text)
}
pub fn bare_method_name(callee: &str) -> &str {
callee.rsplit('.').next().unwrap_or(callee)
}
fn normalize_chained_call(text: &str) -> String {
let mut result = String::with_capacity(text.len());
let bytes = text.as_bytes();
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b'(' => {
let mut depth = 1u32;
let mut j = i + 1;
while j < bytes.len() && depth > 0 {
if bytes[j] == b'(' {
depth += 1;
} else if bytes[j] == b')' {
depth -= 1;
}
j += 1;
}
if j >= bytes.len() || bytes[j] == b'.' {
i = j;
} else {
result.push('(');
i += 1;
}
}
b'<' => break, _ => {
result.push(bytes[i] as char);
i += 1;
}
}
}
result
}
const CANONICAL_LANGS: &[&str] = &[
"javascript",
"typescript",
"python",
"go",
"java",
"c",
"cpp",
"php",
"ruby",
"rust",
];
pub fn canonical_lang(slug: &str) -> &str {
match slug {
"javascript" | "js" => "javascript",
"typescript" | "ts" => "typescript",
"python" | "py" => "python",
"go" => "go",
"java" => "java",
"c" => "c",
"cpp" | "c++" => "cpp",
"php" => "php",
"ruby" | "rb" => "ruby",
"rust" | "rs" => "rust",
_ => slug,
}
}
pub fn cap_to_name(cap: Cap) -> &'static str {
if cap == Cap::all() {
return "all";
}
match cap {
Cap::ENV_VAR => "env_var",
Cap::HTML_ESCAPE => "html_escape",
Cap::SHELL_ESCAPE => "shell_escape",
Cap::URL_ENCODE => "url_encode",
Cap::JSON_PARSE => "json_parse",
Cap::FILE_IO => "file_io",
Cap::FMT_STRING => "fmt_string",
Cap::SQL_QUERY => "sql_query",
Cap::DESERIALIZE => "deserialize",
Cap::SSRF => "ssrf",
Cap::CODE_EXEC => "code_exec",
Cap::CRYPTO => "crypto",
Cap::UNAUTHORIZED_ID => "unauthorized_id",
_ => "unknown",
}
}
pub fn rule_id(lang: &str, kind: &str, matchers: &[&str]) -> String {
let mut sorted: Vec<&str> = matchers.to_vec();
sorted.sort_unstable();
let joined = sorted.join("\0");
let hash = blake3::hash(joined.as_bytes());
let hex = hash.to_hex();
format!("{}.{}.{}", lang, kind, &hex[..8])
}
#[derive(Debug, Clone, Serialize)]
pub struct RuleInfo {
pub id: String,
pub title: String,
pub language: String,
pub kind: String,
pub cap: String,
pub cap_bits: u16,
pub matchers: Vec<String>,
pub case_sensitive: bool,
pub is_custom: bool,
pub is_gated: bool,
pub enabled: bool,
}
pub fn enumerate_builtin_rules() -> Vec<RuleInfo> {
let mut out = Vec::new();
for &lang in CANONICAL_LANGS {
if let Some(rules) = REGISTRY.get(lang) {
for rule in *rules {
let (kind_str, cap) = match rule.label {
DataLabel::Source(c) => ("source", c),
DataLabel::Sanitizer(c) => ("sanitizer", c),
DataLabel::Sink(c) => ("sink", c),
};
let matchers_strs: Vec<&str> = rule.matchers.to_vec();
let id = rule_id(lang, kind_str, &matchers_strs);
let first = rule.matchers.first().copied().unwrap_or("?");
let title = format!("{} ({})", first, kind_str);
out.push(RuleInfo {
id,
title,
language: lang.to_string(),
kind: kind_str.to_string(),
cap: cap_to_name(cap).to_string(),
cap_bits: cap.bits(),
matchers: rule.matchers.iter().map(|s| s.to_string()).collect(),
case_sensitive: rule.case_sensitive,
is_custom: false,
is_gated: false,
enabled: true,
});
}
}
if let Some(gates) = GATED_REGISTRY.get(lang) {
for gate in *gates {
let cap = match gate.label {
DataLabel::Source(c) | DataLabel::Sanitizer(c) | DataLabel::Sink(c) => c,
};
let kind_str = "sink";
let matchers_strs = &[gate.callee_matcher];
let id = rule_id(lang, &format!("gated_{}", kind_str), matchers_strs);
let title = format!("{} (gated {})", gate.callee_matcher, kind_str);
out.push(RuleInfo {
id,
title,
language: lang.to_string(),
kind: kind_str.to_string(),
cap: cap_to_name(cap).to_string(),
cap_bits: cap.bits(),
matchers: vec![gate.callee_matcher.to_string()],
case_sensitive: gate.case_sensitive,
is_custom: false,
is_gated: true,
enabled: true,
});
}
}
}
out
}
pub fn custom_rule_id(lang: &str, kind: &str, matchers: &[String]) -> String {
let refs: Vec<&str> = matchers.iter().map(|s| s.as_str()).collect();
format!("custom.{}", rule_id(lang, kind, &refs))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn bare_method_name_strips_chain() {
assert_eq!(bare_method_name("foo"), "foo");
assert_eq!(bare_method_name("obj.method"), "method");
assert_eq!(bare_method_name("a.b.c.method"), "method");
assert_eq!(bare_method_name("foo."), "");
assert_eq!(bare_method_name(""), "");
assert_eq!(bare_method_name("Lock"), "Lock");
}
#[test]
fn handler_param_names_exact_and_prefix() {
assert!(is_js_ts_handler_param_name("cmd"));
assert!(is_js_ts_handler_param_name("input"));
assert!(is_js_ts_handler_param_name("userId"));
assert!(is_js_ts_handler_param_name("USERID"));
assert!(is_js_ts_handler_param_name("userCmd"));
assert!(is_js_ts_handler_param_name("userData"));
assert!(is_js_ts_handler_param_name("userPath"));
assert!(is_js_ts_handler_param_name("user_cmd"));
assert!(!is_js_ts_handler_param_name("user"));
assert!(!is_js_ts_handler_param_name("userx"));
assert!(!is_js_ts_handler_param_name("url"));
assert!(!is_js_ts_handler_param_name("value"));
}
#[test]
fn classify_none_extra_unchanged() {
let result = classify("javascript", "innerHTML", None);
assert_eq!(result, Some(DataLabel::Sink(Cap::HTML_ESCAPE)));
let result = classify("javascript", "myCustomFunc", None);
assert_eq!(result, None);
}
#[test]
fn classify_extra_rules_take_priority() {
let extras = vec![RuntimeLabelRule {
matchers: vec!["escapeHtml".into()],
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
case_sensitive: false,
}];
let result = classify("javascript", "escapeHtml", Some(&extras));
assert_eq!(result, Some(DataLabel::Sanitizer(Cap::HTML_ESCAPE)));
let result = classify("javascript", "innerHTML", Some(&extras));
assert_eq!(result, Some(DataLabel::Sink(Cap::HTML_ESCAPE)));
}
#[test]
fn classify_extra_overrides_builtin() {
let extras = vec![RuntimeLabelRule {
matchers: vec!["innerHTML".into()],
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
case_sensitive: false,
}];
let result = classify("javascript", "innerHTML", Some(&extras));
assert_eq!(result, Some(DataLabel::Sanitizer(Cap::HTML_ESCAPE)));
}
#[test]
fn classify_location_href_is_sink() {
let result = classify("javascript", "location.href", None);
assert_eq!(result, Some(DataLabel::Sink(Cap::URL_ENCODE)));
}
#[test]
fn classify_bare_href_is_none() {
let result = classify("javascript", "href", None);
assert_eq!(result, None);
}
#[test]
fn classify_case_insensitive_is_default() {
let extras = vec![RuntimeLabelRule {
matchers: vec!["myCustomSink".into()],
label: DataLabel::Sink(Cap::HTML_ESCAPE),
case_sensitive: false,
}];
let result = classify("javascript", "MYCUSTOMSINK", Some(&extras));
assert_eq!(result, Some(DataLabel::Sink(Cap::HTML_ESCAPE)));
}
#[test]
fn classify_case_sensitive_exact_match() {
let extras = vec![RuntimeLabelRule {
matchers: vec!["MyExactSink".into()],
label: DataLabel::Sink(Cap::HTML_ESCAPE),
case_sensitive: true,
}];
let result = classify("javascript", "MyExactSink", Some(&extras));
assert_eq!(result, Some(DataLabel::Sink(Cap::HTML_ESCAPE)));
let result = classify("javascript", "myexactsink", Some(&extras));
assert_eq!(result, None);
}
#[test]
fn classify_case_sensitive_prefix() {
let extras = vec![RuntimeLabelRule {
matchers: vec!["Sanitize_".into()],
label: DataLabel::Sanitizer(Cap::HTML_ESCAPE),
case_sensitive: true,
}];
let result = classify("javascript", "Sanitize_input", Some(&extras));
assert_eq!(result, Some(DataLabel::Sanitizer(Cap::HTML_ESCAPE)));
let result = classify("javascript", "sanitize_input", Some(&extras));
assert_eq!(result, None);
}
#[test]
fn classify_go_os_remove_is_file_io_sink() {
let result = classify("go", "os.Remove", None);
assert_eq!(result, Some(DataLabel::Sink(Cap::FILE_IO)));
}
#[test]
fn classify_go_os_write_file_is_file_io_sink() {
let result = classify("go", "os.WriteFile", None);
assert_eq!(result, Some(DataLabel::Sink(Cap::FILE_IO)));
}
#[test]
fn classify_go_os_remove_all_is_file_io_sink() {
let result = classify("go", "os.RemoveAll", None);
assert_eq!(result, Some(DataLabel::Sink(Cap::FILE_IO)));
}
#[test]
fn classify_go_goqu_l_is_sql_query_sink() {
let result = classify("go", "goqu.L", None);
assert_eq!(result, Some(DataLabel::Sink(Cap::SQL_QUERY)));
}
#[test]
fn classify_go_goqu_lit_is_sql_query_sink() {
let result = classify("go", "goqu.Lit", None);
assert_eq!(result, Some(DataLabel::Sink(Cap::SQL_QUERY)));
}
#[test]
fn classify_go_goqu_i_is_not_sink() {
let result = classify("go", "goqu.I", None);
assert_eq!(result, None);
}
#[test]
fn classify_go_http_default_client_get_is_ssrf_gate() {
let no_kw = |_: &str| None;
let no_kw_present = |_: &str| false;
let result = classify_gated_sink(
"go",
"http.DefaultClient.Get",
|_| None,
no_kw,
no_kw_present,
);
assert!(
result.iter().any(|m| m.label == DataLabel::Sink(Cap::SSRF)),
"expected SSRF gate match, got {result:?}"
);
}
#[test]
fn classify_go_http_default_client_post_is_ssrf_and_data_exfil_gate() {
let no_kw = |_: &str| None;
let no_kw_present = |_: &str| false;
let result = classify_gated_sink(
"go",
"http.DefaultClient.Post",
|_| None,
no_kw,
no_kw_present,
);
assert!(
result.iter().any(|m| m.label == DataLabel::Sink(Cap::SSRF)),
"expected SSRF gate match, got {result:?}"
);
assert!(
result
.iter()
.any(|m| m.label == DataLabel::Sink(Cap::DATA_EXFIL)),
"expected DATA_EXFIL gate match, got {result:?}"
);
}
#[test]
fn classify_go_http_default_client_do_is_data_exfil_gate() {
let no_kw = |_: &str| None;
let no_kw_present = |_: &str| false;
let result = classify_gated_sink(
"go",
"http.DefaultClient.Do",
|_| None,
no_kw,
no_kw_present,
);
assert!(
result
.iter()
.any(|m| m.label == DataLabel::Sink(Cap::DATA_EXFIL)),
"expected DATA_EXFIL gate match, got {result:?}"
);
}
#[test]
fn classify_go_user_client_get_is_not_ssrf_sink() {
let result = classify("go", "client.Get", None);
assert_eq!(result, None);
}
#[test]
fn classify_ruby_bare_open_is_shell_escape_sink() {
let result = classify("ruby", "open", None);
assert_eq!(result, Some(DataLabel::Sink(Cap::SHELL_ESCAPE)));
}
#[test]
fn classify_ruby_file_open_is_not_shell_escape_sink() {
let result = classify_all("ruby", "File.open", None);
assert!(result.contains(&DataLabel::Sink(Cap::FILE_IO)));
assert!(!result.contains(&DataLabel::Sink(Cap::SHELL_ESCAPE)));
}
#[test]
fn classify_ruby_io_open_is_not_shell_escape_sink() {
let result = classify("ruby", "IO.open", None);
assert_ne!(result, Some(DataLabel::Sink(Cap::SHELL_ESCAPE)));
}
#[test]
fn classify_ruby_uri_open_remains_ssrf_sink() {
let result = classify("ruby", "URI.open", None);
assert_eq!(result, Some(DataLabel::Sink(Cap::SSRF)));
}
#[test]
fn classify_ruby_openuri_open_uri_is_ssrf_sink() {
let result = classify("ruby", "OpenURI.open_uri", None);
assert_eq!(result, Some(DataLabel::Sink(Cap::SSRF)));
}
#[test]
fn unpack_matcher_strips_exact_sigil() {
let (m, exact) = unpack_matcher(b"=open");
assert_eq!(m, b"open");
assert!(exact);
let (m, exact) = unpack_matcher(b"open");
assert_eq!(m, b"open");
assert!(!exact);
}
#[test]
fn classify_case_sensitive_suffix_boundary() {
let extras = vec![RuntimeLabelRule {
matchers: vec!["RunQuery".into()],
label: DataLabel::Sink(Cap::SQL_QUERY),
case_sensitive: true,
}];
let result = classify("javascript", "db.RunQuery", Some(&extras));
assert_eq!(result, Some(DataLabel::Sink(Cap::SQL_QUERY)));
let result = classify("javascript", "db.runquery", Some(&extras));
assert_eq!(result, None);
}
#[test]
fn classify_cpp_sto_family_is_sanitizer() {
for callee in [
"std::stoi",
"std::stol",
"std::stoll",
"std::stoul",
"std::stoull",
"std::stof",
"std::stod",
"std::stold",
] {
assert_eq!(
classify("cpp", callee, None),
Some(DataLabel::Sanitizer(Cap::all())),
"{callee} should be a Cap::all() sanitizer",
);
}
}
#[test]
fn parse_cap_works() {
assert_eq!(parse_cap("html_escape"), Some(Cap::HTML_ESCAPE));
assert_eq!(parse_cap("shell_escape"), Some(Cap::SHELL_ESCAPE));
assert_eq!(parse_cap("url_encode"), Some(Cap::URL_ENCODE));
assert_eq!(parse_cap("json_parse"), Some(Cap::JSON_PARSE));
assert_eq!(parse_cap("env_var"), Some(Cap::ENV_VAR));
assert_eq!(parse_cap("file_io"), Some(Cap::FILE_IO));
assert_eq!(parse_cap("all"), Some(Cap::all()));
assert_eq!(parse_cap("ALL"), Some(Cap::all()));
assert_eq!(parse_cap("sql_query"), Some(Cap::SQL_QUERY));
assert_eq!(parse_cap("deserialize"), Some(Cap::DESERIALIZE));
assert_eq!(parse_cap("ssrf"), Some(Cap::SSRF));
assert_eq!(parse_cap("code_exec"), Some(Cap::CODE_EXEC));
assert_eq!(parse_cap("crypto"), Some(Cap::CRYPTO));
assert_eq!(parse_cap("invalid"), None);
}
fn no_kw(_: &str) -> Option<String> {
None
}
fn no_kw_present(_: &str) -> bool {
false
}
fn find_match_with_caps(matches: &[GateMatch], caps: Cap) -> Option<GateMatch> {
matches
.iter()
.find(|m| matches!(m.label, DataLabel::Sink(c) if c.intersects(caps)))
.copied()
}
#[test]
fn gated_sink_dangerous_exact() {
let result = classify_gated_sink(
"javascript",
"setAttribute",
|_| Some("href".to_string()),
no_kw,
no_kw_present,
);
assert_eq!(
result.as_slice(),
&[GateMatch {
label: DataLabel::Sink(Cap::HTML_ESCAPE),
payload_args: [1usize].as_slice(),
object_destination_fields: &[],
}]
);
}
#[test]
fn gated_sink_dangerous_prefix() {
let result = classify_gated_sink(
"javascript",
"setAttribute",
|_| Some("onclick".to_string()),
no_kw,
no_kw_present,
);
assert_eq!(
result.as_slice(),
&[GateMatch {
label: DataLabel::Sink(Cap::HTML_ESCAPE),
payload_args: [1usize].as_slice(),
object_destination_fields: &[],
}]
);
}
#[test]
fn gated_sink_safe_suppressed() {
let result = classify_gated_sink(
"javascript",
"setAttribute",
|_| Some("class".to_string()),
no_kw,
no_kw_present,
);
assert!(result.is_empty());
}
#[test]
fn gated_sink_dynamic_conservative() {
let result =
classify_gated_sink("javascript", "setAttribute", |_| None, no_kw, no_kw_present);
assert_eq!(
result.as_slice(),
&[GateMatch {
label: DataLabel::Sink(Cap::HTML_ESCAPE),
payload_args: ALL_ARGS_PAYLOAD,
object_destination_fields: &[],
}]
);
}
#[test]
fn gated_sink_no_match() {
let result = classify_gated_sink(
"rust",
"setAttribute",
|_| Some("href".to_string()),
no_kw,
no_kw_present,
);
assert!(result.is_empty());
}
#[test]
fn gated_sink_returns_payload_args() {
let result = classify_gated_sink(
"javascript",
"setAttribute",
|_| Some("href".to_string()),
no_kw,
no_kw_present,
);
assert_eq!(result[0].payload_args, &[1]);
let result = classify_gated_sink(
"javascript",
"parseFromString",
|idx| {
if idx == 1 {
Some("text/html".to_string())
} else {
None
}
},
no_kw,
no_kw_present,
);
assert_eq!(result[0].payload_args, &[0]);
}
#[test]
fn gated_sink_parse_from_string_safe_mime() {
let result = classify_gated_sink(
"javascript",
"parseFromString",
|idx| {
if idx == 1 {
Some("text/xml".to_string())
} else {
None
}
},
no_kw,
no_kw_present,
);
assert!(result.is_empty());
}
#[test]
fn gated_sink_python_popen_shell_true() {
let result = classify_gated_sink(
"python",
"Popen",
|_| None,
|kw| {
if kw == "shell" {
Some("True".to_string())
} else {
None
}
},
|kw| kw == "shell",
);
assert_eq!(
result.as_slice(),
&[GateMatch {
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
payload_args: [0usize].as_slice(),
object_destination_fields: &[],
}]
);
}
#[test]
fn gated_sink_python_popen_shell_false() {
let result = classify_gated_sink(
"python",
"Popen",
|_| None,
|kw| {
if kw == "shell" {
Some("False".to_string())
} else {
None
}
},
|kw| kw == "shell",
);
assert!(result.is_empty());
}
#[test]
fn gated_sink_python_popen_no_shell_conservative() {
let result = classify_gated_sink("python", "Popen", |_| None, |_| None, no_kw_present);
assert_eq!(
result.as_slice(),
&[GateMatch {
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
payload_args: ALL_ARGS_PAYLOAD,
object_destination_fields: &[],
}]
);
}
#[test]
fn gated_sink_subprocess_run_shell_true() {
let result = classify_gated_sink(
"python",
"subprocess.run",
|_| None,
|kw| {
if kw == "shell" {
Some("True".to_string())
} else {
None
}
},
|kw| kw == "shell",
);
assert_eq!(
result.as_slice(),
&[GateMatch {
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
payload_args: [0usize].as_slice(),
object_destination_fields: &[],
}]
);
}
#[test]
fn gated_sink_subprocess_run_shell_false() {
let result = classify_gated_sink(
"python",
"subprocess.run",
|_| None,
|kw| {
if kw == "shell" {
Some("False".to_string())
} else {
None
}
},
|kw| kw == "shell",
);
assert!(result.is_empty());
}
#[test]
fn gated_sink_subprocess_run_shell_absent_suppresses() {
let result = classify_gated_sink(
"python",
"subprocess.run",
|_| None,
|_| None,
no_kw_present,
);
assert!(result.is_empty());
}
#[test]
fn gated_sink_subprocess_run_shell_dynamic_conservative() {
let result = classify_gated_sink(
"python",
"subprocess.run",
|_| None,
|_| None, |kw| kw == "shell",
);
assert_eq!(
result.as_slice(),
&[GateMatch {
label: DataLabel::Sink(Cap::SHELL_ESCAPE),
payload_args: ALL_ARGS_PAYLOAD,
object_destination_fields: &[],
}]
);
}
#[test]
fn gated_sink_destination_positional_always_fires() {
let result = classify_gated_sink(
"javascript",
"fetch",
|_| None, no_kw,
no_kw_present,
);
let m = find_match_with_caps(&result, Cap::SSRF).expect("fetch SSRF gate should fire");
assert_eq!(m.label, DataLabel::Sink(Cap::SSRF));
assert_eq!(m.payload_args, &[0]);
assert_eq!(m.object_destination_fields, &["url"]);
}
#[test]
fn gated_sink_destination_object_fields_surfaced() {
let result =
classify_gated_sink("javascript", "http.request", |_| None, no_kw, no_kw_present);
let m = result
.first()
.copied()
.expect("http.request gate should fire");
assert_eq!(m.label, DataLabel::Sink(Cap::SSRF));
assert_eq!(m.payload_args, &[0]);
assert!(
m.object_destination_fields
.iter()
.any(|&f| f == "host" || f == "hostname"),
"expected host/hostname in destination fields, got {:?}",
m.object_destination_fields,
);
}
#[test]
fn gated_sink_fetch_emits_ssrf_and_data_exfil() {
let result = classify_gated_sink("javascript", "fetch", |_| None, no_kw, no_kw_present);
let ssrf = find_match_with_caps(&result, Cap::SSRF).expect("SSRF gate fires");
assert_eq!(ssrf.label, DataLabel::Sink(Cap::SSRF));
assert_eq!(ssrf.payload_args, &[0]);
assert_eq!(ssrf.object_destination_fields, &["url"]);
let exfil = find_match_with_caps(&result, Cap::DATA_EXFIL).expect("DATA_EXFIL gate fires");
assert_eq!(exfil.label, DataLabel::Sink(Cap::DATA_EXFIL));
assert_eq!(exfil.payload_args, &[1]);
assert!(
exfil.object_destination_fields.contains(&"body"),
"expected body in DATA_EXFIL destination fields, got {:?}",
exfil.object_destination_fields,
);
}
#[test]
fn classify_all_single_label() {
let result = classify_all("javascript", "innerHTML", None);
assert_eq!(result.len(), 1);
assert_eq!(result[0], DataLabel::Sink(Cap::HTML_ESCAPE));
}
#[test]
fn classify_all_dual_label_php() {
let result = classify_all("php", "file_get_contents", None);
assert!(result.len() >= 2, "expected dual label, got {:?}", result);
assert!(
result.contains(&DataLabel::Source(Cap::all())),
"expected Source(all), got {:?}",
result
);
assert!(
result.contains(&DataLabel::Sink(Cap::SSRF)),
"expected Sink(SSRF), got {:?}",
result
);
}
#[test]
fn classify_all_dual_label_java() {
let result = classify_all("java", "readObject", None);
assert!(result.len() >= 2, "expected dual label, got {:?}", result);
assert!(
result.contains(&DataLabel::Source(Cap::all())),
"expected Source(all), got {:?}",
result
);
assert!(
result.contains(&DataLabel::Sink(Cap::DESERIALIZE)),
"expected Sink(DESERIALIZE), got {:?}",
result
);
}
#[test]
fn classify_go_echo_sinks_with_runtime_rules() {
use crate::utils::project::{DetectedFramework, FrameworkContext};
let ctx = FrameworkContext {
frameworks: vec![DetectedFramework::Echo],
inspected_langs: std::collections::HashSet::new(),
};
let rules = go::framework_rules(&ctx);
let extras = rules.to_vec();
assert_eq!(
classify("go", "c.String", Some(&extras)),
Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
);
assert_eq!(
classify("go", "c.HTML", Some(&extras)),
Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
);
assert_eq!(
classify("go", "c.JSON", Some(&extras)),
Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
);
let empty = go::framework_rules(&FrameworkContext::default());
assert_eq!(classify("go", "c.String", Some(&empty)), None);
}
#[test]
fn classify_javascript_koa_runtime_rules() {
use crate::utils::project::{DetectedFramework, FrameworkContext};
let ctx = FrameworkContext {
frameworks: vec![DetectedFramework::Koa],
inspected_langs: std::collections::HashSet::new(),
};
let extras = javascript::framework_rules(&ctx);
assert_eq!(
classify("javascript", "ctx.query", Some(&extras)),
Some(DataLabel::Source(Cap::all())),
);
assert_eq!(
classify("javascript", "ctx.cookies.get", Some(&extras)),
Some(DataLabel::Source(Cap::all())),
);
assert_eq!(
classify("javascript", "ctx.body", Some(&extras)),
Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
);
assert_eq!(
classify("javascript", "ctx.redirect", Some(&extras)),
Some(DataLabel::Sink(Cap::SSRF)),
);
let empty = javascript::framework_rules(&FrameworkContext::default());
assert_eq!(classify("javascript", "ctx.query", Some(&empty)), None);
}
#[test]
fn classify_typescript_fastify_runtime_rules() {
use crate::utils::project::{DetectedFramework, FrameworkContext};
let ctx = FrameworkContext {
frameworks: vec![DetectedFramework::Fastify],
inspected_langs: std::collections::HashSet::new(),
};
let extras = typescript::framework_rules(&ctx);
assert_eq!(
classify("typescript", "request.query", Some(&extras)),
Some(DataLabel::Source(Cap::all())),
);
assert_eq!(
classify("typescript", "reply.send", Some(&extras)),
Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
);
assert_eq!(
classify("typescript", "reply.redirect", Some(&extras)),
Some(DataLabel::Sink(Cap::SSRF)),
);
let empty = typescript::framework_rules(&FrameworkContext::default());
assert_eq!(classify("typescript", "request.query", Some(&empty)), None);
}
#[test]
fn classify_ruby_sinatra_template_sinks() {
use crate::utils::project::{DetectedFramework, FrameworkContext};
let ctx = FrameworkContext {
frameworks: vec![DetectedFramework::Sinatra],
inspected_langs: std::collections::HashSet::new(),
};
let rules = ruby::framework_rules(&ctx);
let extras = rules.to_vec();
assert_eq!(
classify("ruby", "erb", Some(&extras)),
Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
);
assert_eq!(
classify("ruby", "haml", Some(&extras)),
Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
);
let empty = ruby::framework_rules(&FrameworkContext::default());
assert_eq!(classify("ruby", "erb", Some(&empty)), None);
}
#[test]
fn classify_rust_axum_runtime_rules() {
use crate::utils::project::{DetectedFramework, FrameworkContext};
let ctx = FrameworkContext {
frameworks: vec![DetectedFramework::Axum],
inspected_langs: std::collections::HashSet::new(),
};
let extras = rust::framework_rules(&ctx);
assert_eq!(
classify("rust", "Path<String>", Some(&extras)),
Some(DataLabel::Source(Cap::all())),
);
assert_eq!(
classify("rust", "HeaderMap.get(\"x-user\")", Some(&extras)),
Some(DataLabel::Source(Cap::all())),
);
assert_eq!(
classify("rust", "Html(name)", Some(&extras)),
Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
);
assert_eq!(
classify("rust", "Redirect::to(next)", Some(&extras)),
Some(DataLabel::Sink(Cap::SSRF)),
);
let empty = rust::framework_rules(&FrameworkContext::default());
assert_eq!(classify("rust", "Html(name)", Some(&empty)), None);
}
#[test]
fn classify_rust_actix_runtime_rules() {
use crate::utils::project::{DetectedFramework, FrameworkContext};
let ctx = FrameworkContext {
frameworks: vec![DetectedFramework::ActixWeb],
inspected_langs: std::collections::HashSet::new(),
};
let extras = rust::framework_rules(&ctx);
assert_eq!(
classify("rust", "web::Json<String>", Some(&extras)),
Some(DataLabel::Source(Cap::all())),
);
assert_eq!(
classify("rust", "HttpRequest.match_info()", Some(&extras)),
Some(DataLabel::Source(Cap::all())),
);
assert_eq!(
classify("rust", "HttpResponse.body(payload)", Some(&extras)),
Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
);
}
#[test]
fn classify_rust_rocket_runtime_rules() {
use crate::utils::project::{DetectedFramework, FrameworkContext};
let ctx = FrameworkContext {
frameworks: vec![DetectedFramework::Rocket],
inspected_langs: std::collections::HashSet::new(),
};
let extras = rust::framework_rules(&ctx);
assert_eq!(
classify("rust", "CookieJar.get_private(\"sid\")", Some(&extras)),
Some(DataLabel::Source(Cap::all())),
);
assert_eq!(
classify("rust", "content::RawHtml(name)", Some(&extras)),
Some(DataLabel::Sink(Cap::HTML_ESCAPE)),
);
assert_eq!(
classify("rust", "Redirect::to(next)", Some(&extras)),
Some(DataLabel::Sink(Cap::SSRF)),
);
}
}