#![allow(clippy::collapsible_if)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PredicateKind {
NullCheck,
EmptyCheck,
ErrorCheck,
ValidationCall,
SanitizerCall,
AllowlistCheck,
TypeCheck,
ShellMetaValidated,
BoundedLength,
Comparison,
Unknown,
}
const SHELL_METACHARS: &[&str] = &[";", "|", "&", "`", "$", ">", "<", "\n", "\r", "\0"];
fn is_shell_metachar_rejection(text: &str) -> bool {
for method in [".contains(", ".includes(", ".include?("] {
if let Some(idx) = text.find(method) {
let args_start = idx + method.len();
if let Some(needle) = extract_first_string_arg(&text[args_start..]) {
if SHELL_METACHARS.contains(&needle.as_str()) {
return true;
}
}
}
}
if let Some(needle) = extract_python_in_needle(text) {
if SHELL_METACHARS.contains(&needle.as_str()) {
return true;
}
}
if is_metachar_regex_class(text) {
return true;
}
false
}
fn extract_first_string_arg(after_open: &str) -> Option<String> {
let bytes = after_open.as_bytes();
let mut i = 0;
while i < bytes.len() && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i >= bytes.len() {
return None;
}
let quote = bytes[i];
if quote != b'"' && quote != b'\'' {
return None;
}
i += 1;
let mut out = Vec::new();
while i < bytes.len() {
let b = bytes[i];
if b == b'\\' && i + 1 < bytes.len() {
match bytes[i + 1] {
b'n' => out.push(b'\n'),
b'r' => out.push(b'\r'),
b't' => out.push(b'\t'),
b'0' => out.push(b'\0'),
c => out.push(c),
}
i += 2;
continue;
}
if b == quote {
return String::from_utf8(out).ok();
}
out.push(b);
i += 1;
}
None
}
fn extract_python_in_needle(text: &str) -> Option<String> {
let pos = text.find(" in ")?;
let left = text[..pos].trim();
let left = left.strip_prefix('!').unwrap_or(left).trim();
let bytes = left.as_bytes();
let quote = *bytes.first()?;
if quote != b'"' && quote != b'\'' {
return None;
}
if bytes.last() != Some("e) || bytes.len() < 2 {
return None;
}
let inner = &left[1..left.len() - 1];
Some(inner.to_string())
}
fn is_metachar_regex_class(text: &str) -> bool {
let mut rest = text;
while let Some(open) = rest.find('[') {
let after = &rest[open + 1..];
if let Some(close) = after.find(']') {
let inner = &after[..close];
if !inner.is_empty()
&& inner
.chars()
.all(|c| SHELL_METACHARS.iter().any(|m| m.starts_with(c)))
{
return true;
}
rest = &after[close + 1..];
} else {
break;
}
}
false
}
fn is_bounded_length_check(lower: &str) -> bool {
const PROBES: &[&str] = &[
".len()", ".length", ];
for probe in PROBES {
let mut rest = lower;
while let Some(pos) = rest.find(probe) {
let after = &rest[pos + probe.len()..];
let after = after.trim_start();
let after = after.strip_prefix("()").unwrap_or(after);
let after = after.trim_start();
for op in [">=", "<=", ">", "<"] {
if let Some(tail) = after.strip_prefix(op) {
let tail = tail.trim_start();
if let Some(n) = parse_leading_uint(tail) {
if n >= 2 {
return true;
}
}
break;
}
}
rest = &rest[pos + probe.len()..];
}
}
false
}
fn parse_leading_uint(s: &str) -> Option<u64> {
let mut n: u64 = 0;
let mut any = false;
for c in s.chars() {
if let Some(d) = c.to_digit(10) {
n = n.checked_mul(10)?.checked_add(d as u64)?;
any = true;
} else {
break;
}
}
any.then_some(n)
}
pub fn classify_condition(text: &str) -> PredicateKind {
if text.is_empty() {
return PredicateKind::Unknown;
}
let lower = text.to_ascii_lowercase();
if lower.contains("is_err")
|| lower.contains("is_ok")
|| lower.contains("err != nil")
|| lower.contains("err == nil")
|| lower.contains("error != nil")
|| lower.contains("error == nil")
{
return PredicateKind::ErrorCheck;
}
if lower.contains("is_none")
|| lower.contains("is_some")
|| lower.contains("== none")
|| lower.contains("!= none")
|| lower.contains("is none")
|| lower.contains("is not none")
|| lower.contains("== null")
|| lower.contains("!= null")
|| lower.contains("=== null")
|| lower.contains("!== null")
|| lower.contains("== nil")
|| lower.contains("!= nil")
{
return PredicateKind::NullCheck;
}
if lower.contains("is_empty")
|| lower.contains(".len() == 0")
|| lower.contains(".len() != 0")
|| lower.contains(".length == 0")
|| lower.contains(".length === 0")
|| lower.contains(".length != 0")
|| lower.contains(".length !== 0")
|| lower.contains("== \"\"")
|| lower.contains("== ''")
{
return PredicateKind::EmptyCheck;
}
if is_shell_metachar_rejection(text) {
return PredicateKind::ShellMetaValidated;
}
if lower.contains(".includes(")
|| lower.contains(".include?(")
|| lower.contains(".contains(")
|| lower.contains(".indexof(")
|| lower.contains(".has(")
|| lower.contains("in_array(")
|| lower.contains(" in ")
|| (lower.contains('[') && !lower.contains('('))
{
return PredicateKind::AllowlistCheck;
}
if let Some(matcher_pos) = lower.find(".matcher(")
&& lower[matcher_pos..].contains(".matches(")
{
let receiver = &lower[..matcher_pos];
if receiver.contains("regex") || receiver.contains("pattern") {
return PredicateKind::ValidationCall;
}
}
if lower.contains("typeof ")
|| lower.contains("isinstance(")
|| lower.contains(" instanceof ")
|| lower.contains(".matches(")
|| lower.contains("is_numeric(")
|| lower.contains("is_int(")
|| lower.contains("is_string(")
|| lower.contains("is_float(")
|| lower.contains("ctype_")
|| lower.contains(".is_a?(")
|| lower.contains(".kind_of?(")
|| (lower.contains(".all(") && lower.contains("is_ascii_"))
|| (lower.contains(".all(") && lower.contains("is_alphanumeric"))
|| (lower.contains(".all(") && lower.contains("is_numeric("))
{
return PredicateKind::TypeCheck;
}
if is_bounded_length_check(&lower) {
return PredicateKind::BoundedLength;
}
if lower.contains('(') {
let trimmed = lower.trim_start_matches(['(', '!', ' ', '\t']);
let trimmed = trimmed.strip_prefix("not ").unwrap_or(trimmed).trim();
let callee_part = trimmed.split('(').next().unwrap_or("");
let bare = callee_part
.rsplit(['.', ':'])
.next()
.unwrap_or(callee_part)
.trim();
if bare.contains("valid")
|| bare.contains("check")
|| bare.contains("verify")
|| bare.starts_with("is_safe")
|| bare.starts_with("is_authorized")
|| bare.starts_with("is_authenticated")
{
return PredicateKind::ValidationCall;
}
if (bare == "test" || bare == "match" || bare == "matches")
&& let Some(dot_pos) = callee_part.rfind('.')
{
let receiver = &callee_part[..dot_pos];
let receiver_lower = receiver.to_ascii_lowercase();
if receiver_lower.contains("regex") || receiver_lower.contains("pattern") {
return PredicateKind::ValidationCall;
}
}
if bare == "matches"
&& let Some(matcher_pos) = lower.find(".matcher(")
{
let receiver = &lower[..matcher_pos];
if receiver.contains("regex") || receiver.contains("pattern") {
return PredicateKind::ValidationCall;
}
}
if bare.contains("sanitiz") || bare.contains("escape") || bare.contains("encode") {
return PredicateKind::SanitizerCall;
}
}
if lower.contains("==")
|| lower.contains("!=")
|| lower.contains(">=")
|| lower.contains("<=")
|| lower.contains(" > ")
|| lower.contains(" < ")
{
return PredicateKind::Comparison;
}
PredicateKind::Unknown
}
pub fn classify_condition_with_target(text: &str) -> (PredicateKind, Option<String>) {
let kind = classify_condition(text);
match kind {
PredicateKind::ValidationCall | PredicateKind::SanitizerCall => {
if let Some(target) = extract_validation_target(text) {
(kind, Some(target))
} else if count_call_args(text).map(|n| n > 1).unwrap_or(false) {
(PredicateKind::Unknown, None)
} else {
(kind, None)
}
}
PredicateKind::AllowlistCheck => {
let target = extract_allowlist_target(text);
(kind, target)
}
PredicateKind::TypeCheck => {
let target = extract_type_check_target(text);
(kind, target)
}
PredicateKind::ShellMetaValidated => {
let target = extract_validation_target(text);
(kind, target)
}
PredicateKind::Comparison => {
let target = extract_comparison_target(text);
(kind, target)
}
_ => (kind, None),
}
}
fn extract_comparison_target(text: &str) -> Option<String> {
let trimmed = text.trim();
for op in &["===", "!==", "==", "!="] {
if let Some(pos) = trimmed.find(op) {
let left = trimmed[..pos].trim();
let right = trimmed[pos + op.len()..].trim();
let left_is_ident = is_identifier(left);
let right_is_ident = is_identifier(right);
let left_is_lit = is_comparison_literal(left);
let right_is_lit = is_comparison_literal(right);
return match (left_is_ident, right_is_ident, left_is_lit, right_is_lit) {
(true, _, false, true) => Some(left.to_string()),
(_, true, true, false) => Some(right.to_string()),
_ => None,
};
}
}
None
}
fn is_comparison_literal(s: &str) -> bool {
let s = s.trim();
if s.is_empty() {
return false;
}
let bytes = s.as_bytes();
if bytes.len() >= 2 {
let first = bytes[0];
let last = bytes[bytes.len() - 1];
if (first == b'"' || first == b'\'' || first == b'`') && first == last {
return true;
}
}
if matches!(s, "null" | "undefined" | "nil" | "None" | "true" | "false") {
return true;
}
let mut chars = s.chars();
let first = chars.next().unwrap();
let rest_start = if first == '-' || first == '+' {
match chars.next() {
Some(c) => c,
None => return false,
}
} else {
first
};
if !rest_start.is_ascii_digit() {
return false;
}
s.chars()
.skip(if first == '-' || first == '+' { 1 } else { 0 })
.all(|c| c.is_ascii_digit() || c == '.' || c == '_')
}
fn count_call_args(text: &str) -> Option<usize> {
let trimmed = text.trim();
let trimmed = trimmed.strip_prefix('!').unwrap_or(trimmed).trim();
let paren_pos = trimmed.find('(')?;
let args_part = &trimmed[paren_pos + 1..];
let args_inner = args_part
.trim_end()
.strip_suffix(')')
.unwrap_or(args_part)
.trim();
if args_inner.is_empty() {
return Some(0);
}
let mut count = 1usize;
let mut depth: i32 = 0;
for ch in args_inner.chars() {
match ch {
'(' | '[' | '{' => depth += 1,
')' | ']' | '}' => depth -= 1,
',' if depth == 0 => count += 1,
_ => {}
}
}
Some(count)
}
fn first_call_arg(args_part: &str) -> Option<&str> {
let bytes = args_part.as_bytes();
let mut depth: usize = 1;
let mut end: Option<usize> = None;
let mut first_comma: Option<usize> = None;
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
match b {
b'(' | b'[' | b'{' => depth += 1,
b')' | b']' | b'}' => {
depth -= 1;
if depth == 0 {
end = Some(i);
break;
}
}
b',' if depth == 1 && first_comma.is_none() => first_comma = Some(i),
b'"' | b'\'' => {
let quote = b;
i += 1;
while i < bytes.len() {
if bytes[i] == b'\\' && i + 1 < bytes.len() {
i += 2;
continue;
}
if bytes[i] == quote {
break;
}
i += 1;
}
}
_ => {}
}
i += 1;
}
let end = end?;
let cut = first_comma.unwrap_or(end);
Some(args_part[..cut].trim())
}
fn extract_validation_target(text: &str) -> Option<String> {
let trimmed = text.trim();
let trimmed = trimmed.trim_start_matches(['(', '!', ' ', '\t']);
let trimmed = trimmed.strip_prefix("not ").unwrap_or(trimmed).trim();
if trimmed.to_ascii_lowercase().contains(".matches(")
&& let Some(matcher_pos) = trimmed.find(".matcher(")
{
let receiver_lower = trimmed[..matcher_pos].to_ascii_lowercase();
if receiver_lower.contains("regex") || receiver_lower.contains("pattern") {
let args_start = matcher_pos + ".matcher(".len();
if let Some(first_arg) = first_call_arg(&trimmed[args_start..]) {
let first_arg = first_arg.strip_prefix('&').unwrap_or(first_arg).trim();
if !first_arg.is_empty() && is_identifier(first_arg) {
return Some(first_arg.to_string());
}
}
}
}
let paren_pos = trimmed.find('(')?;
let callee_part = &trimmed[..paren_pos];
let args_part = &trimmed[paren_pos + 1..];
if let Some(dot_pos) = callee_part.rfind('.') {
let receiver = callee_part[..dot_pos].trim();
let method = callee_part[dot_pos + 1..].trim().to_ascii_lowercase();
if matches!(method.as_str(), "test" | "match" | "matches")
&& let Some(first_arg) = first_call_arg(args_part)
{
let first_arg = first_arg.strip_prefix('&').unwrap_or(first_arg).trim();
if !first_arg.is_empty() && is_identifier(first_arg) {
return Some(first_arg.to_string());
}
}
if !receiver.is_empty() && is_identifier(receiver) {
return Some(receiver.to_string());
}
}
let first_arg = first_call_arg(args_part)?;
let first_arg = first_arg.strip_prefix('&').unwrap_or(first_arg).trim();
if !first_arg.is_empty() && is_identifier(first_arg) {
Some(first_arg.to_string())
} else {
None
}
}
fn extract_allowlist_target(text: &str) -> Option<String> {
let trimmed = text.trim();
let lower = trimmed.to_ascii_lowercase();
for method in &[
".includes(",
".include?(",
".contains(",
".indexof(",
".has(",
] {
if let Some(pos) = lower.find(method) {
let args_start = pos + method.len();
let args_part = &trimmed[args_start..];
if let Some(first_arg) = first_call_arg(args_part) {
let first_arg = first_arg.strip_prefix('$').unwrap_or(first_arg);
if !first_arg.is_empty() && is_identifier(first_arg) {
return Some(first_arg.to_string());
}
}
}
}
if let Some(pos) = lower.find("in_array(") {
let args_start = pos + "in_array(".len();
let args_part = &trimmed[args_start..];
if let Some(first_arg) = first_call_arg(args_part) {
let first_arg = first_arg.strip_prefix('$').unwrap_or(first_arg);
if !first_arg.is_empty() && is_identifier(first_arg) {
return Some(first_arg.to_string());
}
}
}
if lower.contains(" in ") {
let target_part = if let Some(pos) = lower.find(" not in ") {
&trimmed[..pos]
} else if let Some(pos) = lower.find(" in ") {
&trimmed[..pos]
} else {
return None;
};
let target = target_part.trim();
let target = target.strip_prefix('!').unwrap_or(target).trim();
let target = target.strip_prefix('$').unwrap_or(target);
if !target.is_empty() && is_identifier(target) {
return Some(target.to_string());
}
}
if let Some(open) = trimmed.find('[') {
if let Some(close) = trimmed.find(']') {
if close > open + 1 {
let inner = trimmed[open + 1..close].trim();
let inner = inner.strip_prefix('$').unwrap_or(inner);
if !inner.is_empty() && is_identifier(inner) {
return Some(inner.to_string());
}
}
}
}
None
}
fn extract_type_check_target(text: &str) -> Option<String> {
let trimmed = text.trim();
let lower = trimmed.to_ascii_lowercase();
if let Some(pos) = lower.find("typeof ") {
let after = &trimmed[pos + "typeof ".len()..];
let target: String = after
.chars()
.take_while(|c| c.is_alphanumeric() || *c == '_')
.collect();
if !target.is_empty() {
return Some(target);
}
}
if let Some(pos) = lower.find("isinstance(") {
let args_start = pos + "isinstance(".len();
let args_part = &trimmed[args_start..];
let inner = args_part.strip_suffix(')').unwrap_or(args_part);
let first_arg = inner.split(',').next()?.trim();
let first_arg = first_arg.strip_prefix('$').unwrap_or(first_arg);
if !first_arg.is_empty() && is_identifier(first_arg) {
return Some(first_arg.to_string());
}
}
if let Some(pos) = lower.find(" instanceof ") {
let var_part = trimmed[..pos].trim();
if !var_part.is_empty() && is_identifier(var_part) {
return Some(var_part.to_string());
}
}
if let Some(pos) = lower.find(".matches(") {
let receiver = trimmed[..pos].trim();
let receiver = receiver.strip_prefix('!').unwrap_or(receiver).trim();
if !receiver.is_empty() && is_identifier(receiver) {
return Some(receiver.to_string());
}
}
for func in &["is_numeric(", "is_int(", "is_string(", "is_float("] {
if let Some(pos) = lower.find(func) {
let args_start = pos + func.len();
let args_part = &trimmed[args_start..];
let inner = args_part.strip_suffix(')').unwrap_or(args_part);
let first_arg = inner.split(',').next()?.trim();
let first_arg = first_arg.strip_prefix('$').unwrap_or(first_arg);
if !first_arg.is_empty() && is_identifier(first_arg) {
return Some(first_arg.to_string());
}
}
}
for method in &[".is_a?(", ".kind_of?("] {
if let Some(pos) = lower.find(method) {
let receiver = trimmed[..pos].trim();
let receiver = receiver.strip_prefix('!').unwrap_or(receiver).trim();
if !receiver.is_empty() && is_identifier(receiver) {
return Some(receiver.to_string());
}
}
}
if let Some(pos) = lower.find("ctype_") {
if let Some(paren_pos) = trimmed[pos..].find('(') {
let args_start = pos + paren_pos + 1;
let args_part = &trimmed[args_start..];
let inner = args_part.strip_suffix(')').unwrap_or(args_part);
let first_arg = inner.split(',').next()?.trim();
let first_arg = first_arg.strip_prefix('$').unwrap_or(first_arg);
if !first_arg.is_empty() && is_identifier(first_arg) {
return Some(first_arg.to_string());
}
}
}
None
}
fn is_identifier(s: &str) -> bool {
!s.is_empty()
&& s.chars()
.all(|c| c.is_alphanumeric() || c == '_' || c == '.')
&& !s.starts_with(|c: char| c.is_ascii_digit())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn classify_empty_is_unknown() {
assert_eq!(classify_condition(""), PredicateKind::Unknown);
}
#[test]
fn classify_null_checks() {
assert_eq!(classify_condition("x.is_none()"), PredicateKind::NullCheck);
assert_eq!(classify_condition("x == null"), PredicateKind::NullCheck);
assert_eq!(classify_condition("x != nil"), PredicateKind::NullCheck);
assert_eq!(classify_condition("x is None"), PredicateKind::NullCheck);
assert_eq!(classify_condition("x === null"), PredicateKind::NullCheck);
}
#[test]
fn classify_error_checks() {
assert_eq!(classify_condition("x.is_err()"), PredicateKind::ErrorCheck);
assert_eq!(classify_condition("err != nil"), PredicateKind::ErrorCheck);
assert_eq!(classify_condition("x.is_ok()"), PredicateKind::ErrorCheck);
}
#[test]
fn classify_empty_checks() {
assert_eq!(
classify_condition("x.is_empty()"),
PredicateKind::EmptyCheck
);
assert_eq!(
classify_condition("x.len() == 0"),
PredicateKind::EmptyCheck
);
assert_eq!(
classify_condition("x.length === 0"),
PredicateKind::EmptyCheck
);
}
#[test]
fn classify_validation_call() {
assert_eq!(
classify_condition("validate(x)"),
PredicateKind::ValidationCall
);
assert_eq!(
classify_condition("is_safe(input)"),
PredicateKind::ValidationCall
);
assert_eq!(
classify_condition("check_auth(req)"),
PredicateKind::ValidationCall
);
assert_eq!(
classify_condition("input.verify(sig)"),
PredicateKind::ValidationCall
);
}
#[test]
fn classify_validation_requires_paren() {
assert_eq!(
classify_condition("x_valid == true"),
PredicateKind::Comparison
);
assert_eq!(
classify_condition("is_valid && ready"),
PredicateKind::Unknown
);
}
#[test]
fn classify_sanitizer_call() {
assert_eq!(
classify_condition("sanitize(x)"),
PredicateKind::SanitizerCall
);
assert_eq!(
classify_condition("html_escape(s)"),
PredicateKind::SanitizerCall
);
assert_eq!(
classify_condition("url_encode(path)"),
PredicateKind::SanitizerCall
);
}
#[test]
fn classify_comparison() {
assert_eq!(classify_condition("x == 5"), PredicateKind::Comparison);
assert_eq!(classify_condition("x != y"), PredicateKind::Comparison);
assert_eq!(classify_condition("a >= b"), PredicateKind::Comparison);
}
#[test]
fn classify_unknown_fallback() {
assert_eq!(classify_condition("flag"), PredicateKind::Unknown);
assert_eq!(classify_condition("a && b"), PredicateKind::Unknown);
}
#[test]
fn target_function_call_first_arg() {
let (kind, target) = classify_condition_with_target("validate(x, config)");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("x"));
}
#[test]
fn target_method_call_receiver() {
let (kind, target) = classify_condition_with_target("x.isValid()");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("x"));
}
#[test]
fn target_sanitizer_first_arg() {
let (kind, target) = classify_condition_with_target("sanitize(input)");
assert_eq!(kind, PredicateKind::SanitizerCall);
assert_eq!(target.as_deref(), Some("input"));
}
#[test]
fn target_negated_validation() {
let (kind, target) = classify_condition_with_target("!validate(&x)");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("x"));
}
#[test]
fn target_regex_test_first_arg() {
let (kind, target) = classify_condition_with_target("!SAFE_STRING_REGEX.test(value)");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("value"));
}
#[test]
fn target_regex_test_pattern_receiver() {
let (kind, target) = classify_condition_with_target("ALLOWED_PATTERN.test(s)");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("s"));
}
#[test]
fn target_test_non_regex_receiver_is_not_validation() {
let kind = classify_condition("obj.test(value)");
assert_eq!(kind, PredicateKind::Unknown);
}
#[test]
fn target_comparison_extracts_identifier_side() {
let (kind, target) = classify_condition_with_target("x == 5");
assert_eq!(kind, PredicateKind::Comparison);
assert_eq!(target.as_deref(), Some("x"));
}
#[test]
fn target_comparison_strict_equality_with_string() {
let (kind, target) = classify_condition_with_target("x === '/login'");
assert_eq!(kind, PredicateKind::Comparison);
assert_eq!(target.as_deref(), Some("x"));
}
#[test]
fn target_comparison_literal_on_left() {
let (kind, target) = classify_condition_with_target("null != obj");
assert_eq!(kind, PredicateKind::Comparison);
assert_eq!(target.as_deref(), Some("obj"));
}
#[test]
fn target_comparison_both_identifiers_returns_none() {
let (kind, target) = classify_condition_with_target("x === y");
assert_eq!(kind, PredicateKind::Comparison);
assert_eq!(target, None);
}
#[test]
fn target_comparison_both_literals_returns_none() {
let (kind, target) = classify_condition_with_target("'a' == 'b'");
assert_eq!(kind, PredicateKind::Comparison);
assert_eq!(target, None);
}
#[test]
fn target_check_auth_first_arg() {
let (kind, target) = classify_condition_with_target("check_auth(req)");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("req"));
}
#[test]
fn target_method_with_args() {
let (kind, target) = classify_condition_with_target("input.verify(sig)");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("input"));
}
#[test]
fn target_multi_arg_fallback_opaque_expr_is_unknown() {
let (kind, target) = classify_condition_with_target("validate(x + 1, y)");
assert_eq!(kind, PredicateKind::Unknown);
assert_eq!(target, None);
}
#[test]
fn target_single_arg_fallback_preserves_kind() {
let (kind, target) = classify_condition_with_target("validate(x + 1)");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target, None);
}
#[test]
fn count_call_args_basic() {
assert_eq!(super::count_call_args("f(a, b, c)"), Some(3));
assert_eq!(super::count_call_args("f(a)"), Some(1));
assert_eq!(super::count_call_args("f()"), Some(0));
assert_eq!(super::count_call_args("f(g(x, y), z)"), Some(2));
assert_eq!(super::count_call_args("not_a_call"), None);
}
#[test]
fn classify_allowlist_includes() {
assert_eq!(
classify_condition("ALLOWED.includes(cmd)"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn classify_allowlist_in_array() {
assert_eq!(
classify_condition("in_array($cmd, $allowed)"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn classify_allowlist_python_not_in() {
assert_eq!(
classify_condition("cmd not in ALLOWED"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn classify_allowlist_python_in() {
assert_eq!(
classify_condition("cmd in ALLOWED"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn classify_allowlist_map_lookup() {
assert_eq!(
classify_condition("allowed[cmd]"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn classify_allowlist_contains() {
assert_eq!(
classify_condition("whitelist.contains(value)"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn classify_allowlist_has() {
assert_eq!(
classify_condition("allowedSet.has(key)"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn extract_allowlist_target_negated_paren_wrapper() {
let (kind, target) = classify_condition_with_target("(!ALLOWED.includes(cmd))");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
#[test]
fn extract_allowlist_target_java_contains_paren_wrapper() {
let (kind, target) = classify_condition_with_target("(!ALLOWED.contains(cmd))");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
#[test]
fn extract_allowlist_target_in_array_paren_wrapper() {
let (kind, target) = classify_condition_with_target("(!in_array($cmd, $allowed))");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
#[test]
fn classify_type_check_typeof() {
assert_eq!(
classify_condition("typeof input !== 'number'"),
PredicateKind::TypeCheck
);
}
#[test]
fn classify_type_check_isinstance() {
assert_eq!(
classify_condition("isinstance(user_id, int)"),
PredicateKind::TypeCheck
);
}
#[test]
fn classify_type_check_matches() {
assert_eq!(
classify_condition("input.matches(\"\\\\d+\")"),
PredicateKind::TypeCheck
);
}
#[test]
fn classify_type_check_is_numeric() {
assert_eq!(
classify_condition("is_numeric($id)"),
PredicateKind::TypeCheck
);
}
#[test]
fn classify_type_check_is_int() {
assert_eq!(classify_condition("is_int($x)"), PredicateKind::TypeCheck);
}
#[test]
fn classify_type_check_ctype() {
assert_eq!(
classify_condition("ctype_digit($x)"),
PredicateKind::TypeCheck
);
}
#[test]
fn target_allowlist_includes() {
let (kind, target) = classify_condition_with_target("ALLOWED.includes(cmd)");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
#[test]
fn target_allowlist_in_array() {
let (kind, target) = classify_condition_with_target("in_array($cmd, $allowed)");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
#[test]
fn target_allowlist_python_in() {
let (kind, target) = classify_condition_with_target("cmd in ALLOWED");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
#[test]
fn target_allowlist_python_not_in() {
let (kind, target) = classify_condition_with_target("cmd not in ALLOWED");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
#[test]
fn target_allowlist_map_lookup() {
let (kind, target) = classify_condition_with_target("allowed[cmd]");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
#[test]
fn target_type_check_typeof() {
let (kind, target) = classify_condition_with_target("typeof input !== 'number'");
assert_eq!(kind, PredicateKind::TypeCheck);
assert_eq!(target.as_deref(), Some("input"));
}
#[test]
fn target_type_check_isinstance() {
let (kind, target) = classify_condition_with_target("isinstance(user_id, int)");
assert_eq!(kind, PredicateKind::TypeCheck);
assert_eq!(target.as_deref(), Some("user_id"));
}
#[test]
fn target_type_check_matches() {
let (kind, target) = classify_condition_with_target("input.matches(\"\\\\d+\")");
assert_eq!(kind, PredicateKind::TypeCheck);
assert_eq!(target.as_deref(), Some("input"));
}
#[test]
fn target_type_check_is_numeric() {
let (kind, target) = classify_condition_with_target("is_numeric($id)");
assert_eq!(kind, PredicateKind::TypeCheck);
assert_eq!(target.as_deref(), Some("id"));
}
#[test]
fn target_type_check_ctype() {
let (kind, target) = classify_condition_with_target("ctype_digit($x)");
assert_eq!(kind, PredicateKind::TypeCheck);
assert_eq!(target.as_deref(), Some("x"));
}
#[test]
fn classify_type_check_is_a() {
assert_eq!(
classify_condition("user_id.is_a?(Integer)"),
PredicateKind::TypeCheck
);
}
#[test]
fn target_type_check_is_a() {
let (kind, target) = classify_condition_with_target("user_id.is_a?(Integer)");
assert_eq!(kind, PredicateKind::TypeCheck);
assert_eq!(target.as_deref(), Some("user_id"));
}
#[test]
fn classify_allowlist_include_question() {
assert_eq!(
classify_condition("ALLOWED.include?(cmd)"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn target_allowlist_include_question() {
let (kind, target) = classify_condition_with_target("ALLOWED.include?(cmd)");
assert_eq!(kind, PredicateKind::AllowlistCheck);
assert_eq!(target.as_deref(), Some("cmd"));
}
#[test]
fn classify_instanceof_is_type_check() {
assert_eq!(
classify_condition("x instanceof String"),
PredicateKind::TypeCheck
);
}
#[test]
fn target_instanceof_x_string() {
let (kind, target) = classify_condition_with_target("x instanceof String");
assert_eq!(kind, PredicateKind::TypeCheck);
assert_eq!(target.as_deref(), Some("x"));
}
#[test]
fn target_instanceof_obj_integer() {
let (kind, target) = classify_condition_with_target("obj instanceof Integer");
assert_eq!(kind, PredicateKind::TypeCheck);
assert_eq!(target.as_deref(), Some("obj"));
}
#[test]
fn classify_shell_metachar_contains_rust() {
assert_eq!(
classify_condition("input.contains(\";\")"),
PredicateKind::ShellMetaValidated
);
assert_eq!(
classify_condition("cmd.contains(\"|\")"),
PredicateKind::ShellMetaValidated
);
assert_eq!(
classify_condition("s.contains(\"&\")"),
PredicateKind::ShellMetaValidated
);
assert_eq!(
classify_condition("s.contains(\"`\")"),
PredicateKind::ShellMetaValidated
);
assert_eq!(
classify_condition("s.contains(\"$\")"),
PredicateKind::ShellMetaValidated
);
}
#[test]
fn classify_shell_metachar_includes_js() {
assert_eq!(
classify_condition("input.includes(';')"),
PredicateKind::ShellMetaValidated
);
assert_eq!(
classify_condition("cmd.includes(\"|\")"),
PredicateKind::ShellMetaValidated
);
}
#[test]
fn classify_shell_metachar_include_question_ruby() {
assert_eq!(
classify_condition("cmd.include?(\";\")"),
PredicateKind::ShellMetaValidated
);
}
#[test]
fn classify_shell_metachar_python_in() {
assert_eq!(
classify_condition("\";\" in cmd"),
PredicateKind::ShellMetaValidated
);
assert_eq!(
classify_condition("'|' in cmd"),
PredicateKind::ShellMetaValidated
);
}
#[test]
fn classify_shell_metachar_regex_class() {
assert_eq!(
classify_condition("cmd.match(/[;|&]/)"),
PredicateKind::ShellMetaValidated
);
assert_eq!(
classify_condition("re.search(\"[;|&]\", cmd)"),
PredicateKind::ShellMetaValidated
);
}
#[test]
fn classify_non_metachar_contains_stays_allowlist() {
assert_eq!(
classify_condition("input.contains(\"foo\")"),
PredicateKind::AllowlistCheck
);
assert_eq!(
classify_condition("path.contains(\"..\")"),
PredicateKind::AllowlistCheck
);
assert_eq!(
classify_condition("name.contains(\"admin\")"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn classify_allowlist_membership_unaffected() {
assert_eq!(
classify_condition("cmd in ALLOWED"),
PredicateKind::AllowlistCheck
);
assert_eq!(
classify_condition("cmd not in ALLOWED"),
PredicateKind::AllowlistCheck
);
}
#[test]
fn target_shell_metachar_receiver() {
let (kind, target) = classify_condition_with_target("input.contains(\";\")");
assert_eq!(kind, PredicateKind::ShellMetaValidated);
assert_eq!(target.as_deref(), Some("input"));
}
#[test]
fn classify_bounded_length_rust_len() {
assert_eq!(
classify_condition("input.len() > 100"),
PredicateKind::BoundedLength
);
assert_eq!(
classify_condition("s.len() >= 256"),
PredicateKind::BoundedLength
);
assert_eq!(
classify_condition("s.len() < 4096"),
PredicateKind::BoundedLength
);
}
#[test]
fn classify_bounded_length_js_length() {
assert_eq!(
classify_condition("input.length > 100"),
PredicateKind::BoundedLength
);
}
#[test]
fn classify_non_empty_len_stays_comparison() {
assert_eq!(
classify_condition("input.len() > 0"),
PredicateKind::Comparison
);
assert_eq!(
classify_condition("s.len() >= 1"),
PredicateKind::Comparison
);
}
#[test]
fn shell_metachar_rejection_detects_common_chars() {
for m in &[";", "|", "&", "`", "$", ">", "<"] {
let text = format!("x.contains(\"{m}\")");
assert!(
is_shell_metachar_rejection(&text),
"should detect metachar {m:?} in {text:?}"
);
}
}
#[test]
fn shell_metachar_rejection_rejects_non_metachar() {
assert!(!is_shell_metachar_rejection("x.contains(\"foo\")"));
assert!(!is_shell_metachar_rejection("x.contains(\"admin\")"));
assert!(!is_shell_metachar_rejection("x.contains(\"..\")"));
}
#[test]
fn shell_metachar_rejection_handles_escapes() {
assert!(is_shell_metachar_rejection("x.contains(\"\\n\")"));
}
#[test]
fn bounded_length_rejects_zero_and_one() {
assert!(!is_bounded_length_check("x.len() > 0"));
assert!(!is_bounded_length_check("x.len() >= 1"));
assert!(!is_bounded_length_check("x.len() < 1"));
}
#[test]
fn bounded_length_accepts_small_bounds() {
assert!(is_bounded_length_check("x.len() > 2"));
assert!(is_bounded_length_check("x.len() <= 256"));
}
}
#[cfg(test)]
mod ghsa_h8cj_hpmg_636v_tests {
use super::*;
#[test]
fn java_pattern_matcher_chain_classifies_as_validation() {
let kind =
classify_condition("FILTER_TEMP_TABLE_NAME_PATTERN.matcher(tableName).matches()");
assert_eq!(
kind,
PredicateKind::ValidationCall,
"matcher().matches() chain on PATTERN-named receiver should be ValidationCall"
);
}
#[test]
fn java_pattern_matcher_chain_target_is_matcher_arg() {
let (kind, target) = classify_condition_with_target(
"FILTER_TEMP_TABLE_NAME_PATTERN.matcher(tableName).matches()",
);
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("tableName"));
}
#[test]
fn java_negated_pattern_matcher_chain_target_is_matcher_arg() {
let (kind, target) = classify_condition_with_target(
"!FILTER_TEMP_TABLE_NAME_PATTERN.matcher(tableName).matches()",
);
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("tableName"));
}
#[test]
fn java_pattern_matcher_chain_non_pattern_receiver_is_not_validation() {
let kind = classify_condition("obj.matcher(x).matches()");
assert!(
kind != PredicateKind::ValidationCall,
"no regex marker should not trigger validation"
);
}
}