use crate::labels::{Cap, bare_method_name};
use crate::symbol::Lang;
use super::value::SymbolicValue;
#[derive(Clone, Debug, PartialEq)]
pub enum StringMethod {
Trim,
ToLower,
ToUpper,
Replace {
pattern: String,
replacement: String,
},
Substr,
StrLen,
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum StringOperandSource {
Receiver,
FirstArg,
}
#[derive(Clone, Debug)]
pub struct StringMethodInfo {
pub method: StringMethod,
pub operand_source: StringOperandSource,
}
#[derive(Clone, Debug)]
pub struct SanitizerInfo {
pub sanitized_caps: Cap,
pub is_global: bool,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum TransformKind {
HtmlEscape,
UrlEncode,
ShellEscape,
SqlEscape,
Base64Encode,
Base64Decode,
UrlDecode,
}
impl TransformKind {
pub fn display_name(self) -> &'static str {
match self {
TransformKind::HtmlEscape => "htmlEscape",
TransformKind::UrlEncode => "urlEncode",
TransformKind::ShellEscape => "shellEscape",
TransformKind::SqlEscape => "sqlEscape",
TransformKind::Base64Encode => "base64Encode",
TransformKind::Base64Decode => "base64Decode",
TransformKind::UrlDecode => "urlDecode",
}
}
pub fn verified_cap(self) -> Cap {
match self {
TransformKind::HtmlEscape => Cap::HTML_ESCAPE,
TransformKind::UrlEncode => Cap::URL_ENCODE,
TransformKind::ShellEscape => Cap::SHELL_ESCAPE,
TransformKind::SqlEscape => Cap::empty(),
TransformKind::Base64Encode
| TransformKind::Base64Decode
| TransformKind::UrlDecode => Cap::empty(),
}
}
pub fn is_protective(self) -> bool {
!self.verified_cap().is_empty()
}
}
#[derive(Clone, Debug)]
pub struct TransformMethodInfo {
pub kind: TransformKind,
pub operand_source: StringOperandSource,
}
pub fn classify_string_method(
callee: &str,
args: &[SymbolicValue],
lang: Lang,
) -> Option<StringMethodInfo> {
let method = bare_method_name(callee);
match lang {
Lang::JavaScript | Lang::TypeScript => classify_js(method, args),
Lang::Python => classify_python(method, callee, args),
Lang::Ruby => classify_ruby(method, args),
Lang::Java => classify_java(method, args),
Lang::Go => classify_go(method, callee, args),
Lang::Php => classify_php(method, callee, args),
Lang::Rust => classify_rust(method, args),
Lang::C | Lang::Cpp => classify_c(method),
}
}
fn classify_js(method: &str, args: &[SymbolicValue]) -> Option<StringMethodInfo> {
use StringMethod::*;
use StringOperandSource::*;
match method {
"trim" | "trimStart" | "trimEnd" => Some(StringMethodInfo {
method: Trim,
operand_source: Receiver,
}),
"toLowerCase" => Some(StringMethodInfo {
method: ToLower,
operand_source: Receiver,
}),
"toUpperCase" => Some(StringMethodInfo {
method: ToUpper,
operand_source: Receiver,
}),
"replace" | "replaceAll" => {
let (pat, rep) = extract_replace_args(args, 1)?;
Some(StringMethodInfo {
method: Replace {
pattern: pat,
replacement: rep,
},
operand_source: Receiver,
})
}
"substring" | "substr" | "slice" => {
if has_concrete_index(args, 1) {
Some(StringMethodInfo {
method: Substr,
operand_source: Receiver,
})
} else {
None
}
}
_ => None,
}
}
fn classify_python(method: &str, callee: &str, args: &[SymbolicValue]) -> Option<StringMethodInfo> {
use StringMethod::*;
use StringOperandSource::*;
if callee == "len" {
return Some(StringMethodInfo {
method: StrLen,
operand_source: FirstArg,
});
}
match method {
"strip" | "lstrip" | "rstrip" => Some(StringMethodInfo {
method: Trim,
operand_source: Receiver,
}),
"lower" => Some(StringMethodInfo {
method: ToLower,
operand_source: Receiver,
}),
"upper" => Some(StringMethodInfo {
method: ToUpper,
operand_source: Receiver,
}),
"replace" => {
let (pat, rep) = extract_replace_args(args, 1)?;
Some(StringMethodInfo {
method: Replace {
pattern: pat,
replacement: rep,
},
operand_source: Receiver,
})
}
_ => None,
}
}
fn classify_ruby(method: &str, args: &[SymbolicValue]) -> Option<StringMethodInfo> {
use StringMethod::*;
use StringOperandSource::*;
match method {
"strip" | "lstrip" | "rstrip" => Some(StringMethodInfo {
method: Trim,
operand_source: Receiver,
}),
"downcase" => Some(StringMethodInfo {
method: ToLower,
operand_source: Receiver,
}),
"upcase" => Some(StringMethodInfo {
method: ToUpper,
operand_source: Receiver,
}),
"gsub" | "sub" => {
let (pat, rep) = extract_replace_args(args, 1)?;
Some(StringMethodInfo {
method: Replace {
pattern: pat,
replacement: rep,
},
operand_source: Receiver,
})
}
"length" | "size" => Some(StringMethodInfo {
method: StrLen,
operand_source: Receiver,
}),
_ => None,
}
}
fn classify_java(method: &str, args: &[SymbolicValue]) -> Option<StringMethodInfo> {
use StringMethod::*;
use StringOperandSource::*;
match method {
"trim" => Some(StringMethodInfo {
method: Trim,
operand_source: Receiver,
}),
"toLowerCase" => Some(StringMethodInfo {
method: ToLower,
operand_source: Receiver,
}),
"toUpperCase" => Some(StringMethodInfo {
method: ToUpper,
operand_source: Receiver,
}),
"replace" | "replaceAll" => {
let (pat, rep) = extract_replace_args(args, 1)?;
Some(StringMethodInfo {
method: Replace {
pattern: pat,
replacement: rep,
},
operand_source: Receiver,
})
}
"substring" => {
if has_concrete_index(args, 1) {
Some(StringMethodInfo {
method: Substr,
operand_source: Receiver,
})
} else {
None
}
}
"length" => Some(StringMethodInfo {
method: StrLen,
operand_source: Receiver,
}),
_ => None,
}
}
fn classify_go(method: &str, callee: &str, args: &[SymbolicValue]) -> Option<StringMethodInfo> {
use StringMethod::*;
use StringOperandSource::*;
match callee {
"strings.TrimSpace" => Some(StringMethodInfo {
method: Trim,
operand_source: FirstArg,
}),
"strings.ToLower" => Some(StringMethodInfo {
method: ToLower,
operand_source: FirstArg,
}),
"strings.ToUpper" => Some(StringMethodInfo {
method: ToUpper,
operand_source: FirstArg,
}),
"strings.Replace" | "strings.ReplaceAll" => {
let (pat, rep) = extract_replace_args(args, 1)?;
Some(StringMethodInfo {
method: Replace {
pattern: pat,
replacement: rep,
},
operand_source: FirstArg,
})
}
_ => {
if method == "len" {
Some(StringMethodInfo {
method: StrLen,
operand_source: FirstArg,
})
} else {
None
}
}
}
}
fn classify_php(method: &str, callee: &str, args: &[SymbolicValue]) -> Option<StringMethodInfo> {
use StringMethod::*;
use StringOperandSource::*;
match callee {
"trim" | "ltrim" | "rtrim" => Some(StringMethodInfo {
method: Trim,
operand_source: FirstArg,
}),
"strtolower" => Some(StringMethodInfo {
method: ToLower,
operand_source: FirstArg,
}),
"strtoupper" => Some(StringMethodInfo {
method: ToUpper,
operand_source: FirstArg,
}),
"str_replace" => {
let (pat, rep) = extract_replace_args(args, 0)?;
Some(StringMethodInfo {
method: Replace {
pattern: pat,
replacement: rep,
},
operand_source: FirstArg,
})
}
"strlen" => Some(StringMethodInfo {
method: StrLen,
operand_source: FirstArg,
}),
"substr" => {
if has_concrete_index(args, 1) {
Some(StringMethodInfo {
method: Substr,
operand_source: FirstArg,
})
} else {
None
}
}
_ => {
match method {
"trim" => Some(StringMethodInfo {
method: Trim,
operand_source: Receiver,
}),
_ => None,
}
}
}
}
fn classify_rust(method: &str, _args: &[SymbolicValue]) -> Option<StringMethodInfo> {
use StringMethod::*;
use StringOperandSource::*;
match method {
"trim" | "trim_start" | "trim_end" => Some(StringMethodInfo {
method: Trim,
operand_source: Receiver,
}),
"to_lowercase" => Some(StringMethodInfo {
method: ToLower,
operand_source: Receiver,
}),
"to_uppercase" => Some(StringMethodInfo {
method: ToUpper,
operand_source: Receiver,
}),
"len" => Some(StringMethodInfo {
method: StrLen,
operand_source: Receiver,
}),
_ => None,
}
}
fn classify_c(method: &str) -> Option<StringMethodInfo> {
use StringMethod::*;
use StringOperandSource::*;
match method {
"tolower" => Some(StringMethodInfo {
method: ToLower,
operand_source: FirstArg,
}),
"toupper" => Some(StringMethodInfo {
method: ToUpper,
operand_source: FirstArg,
}),
"strlen" => Some(StringMethodInfo {
method: StrLen,
operand_source: FirstArg,
}),
_ => None,
}
}
pub fn classify_transform_method(callee: &str, lang: Lang) -> Option<TransformMethodInfo> {
match lang {
Lang::JavaScript | Lang::TypeScript => classify_transform_js(callee),
Lang::Python => classify_transform_python(callee),
Lang::Php => classify_transform_php(callee),
Lang::Java => classify_transform_java(callee),
Lang::Go => classify_transform_go(callee),
Lang::Ruby => classify_transform_ruby(callee),
_ => None,
}
}
fn classify_transform_js(callee: &str) -> Option<TransformMethodInfo> {
use StringOperandSource::*;
use TransformKind::*;
let method = bare_method_name(callee);
match method {
"encodeURIComponent" | "encodeURI" => Some(TransformMethodInfo {
kind: UrlEncode,
operand_source: FirstArg,
}),
"decodeURIComponent" | "decodeURI" => Some(TransformMethodInfo {
kind: UrlDecode,
operand_source: FirstArg,
}),
"btoa" => Some(TransformMethodInfo {
kind: Base64Encode,
operand_source: FirstArg,
}),
"atob" => Some(TransformMethodInfo {
kind: Base64Decode,
operand_source: FirstArg,
}),
"encode" | "escape" if callee.starts_with("he.") => Some(TransformMethodInfo {
kind: HtmlEscape,
operand_source: FirstArg,
}),
_ => None,
}
}
fn classify_transform_python(callee: &str) -> Option<TransformMethodInfo> {
use StringOperandSource::*;
use TransformKind::*;
match callee {
"html.escape" | "cgi.escape" => Some(TransformMethodInfo {
kind: HtmlEscape,
operand_source: FirstArg,
}),
"urllib.parse.quote" | "urllib.parse.quote_plus" => Some(TransformMethodInfo {
kind: UrlEncode,
operand_source: FirstArg,
}),
"urllib.parse.unquote" => Some(TransformMethodInfo {
kind: UrlDecode,
operand_source: FirstArg,
}),
"shlex.quote" => Some(TransformMethodInfo {
kind: ShellEscape,
operand_source: FirstArg,
}),
"base64.b64encode" => Some(TransformMethodInfo {
kind: Base64Encode,
operand_source: FirstArg,
}),
"base64.b64decode" => Some(TransformMethodInfo {
kind: Base64Decode,
operand_source: FirstArg,
}),
_ => None,
}
}
fn classify_transform_php(callee: &str) -> Option<TransformMethodInfo> {
use StringOperandSource::*;
use TransformKind::*;
match callee {
"htmlspecialchars" | "htmlentities" => Some(TransformMethodInfo {
kind: HtmlEscape,
operand_source: FirstArg,
}),
"urlencode" | "rawurlencode" => Some(TransformMethodInfo {
kind: UrlEncode,
operand_source: FirstArg,
}),
"urldecode" | "rawurldecode" => Some(TransformMethodInfo {
kind: UrlDecode,
operand_source: FirstArg,
}),
"base64_encode" => Some(TransformMethodInfo {
kind: Base64Encode,
operand_source: FirstArg,
}),
"base64_decode" => Some(TransformMethodInfo {
kind: Base64Decode,
operand_source: FirstArg,
}),
"escapeshellarg" | "escapeshellcmd" => Some(TransformMethodInfo {
kind: ShellEscape,
operand_source: FirstArg,
}),
"addslashes" => Some(TransformMethodInfo {
kind: SqlEscape,
operand_source: FirstArg,
}),
_ => None,
}
}
fn classify_transform_java(callee: &str) -> Option<TransformMethodInfo> {
use StringOperandSource::*;
use TransformKind::*;
let method = bare_method_name(callee);
if callee.ends_with("URLEncoder.encode") {
return Some(TransformMethodInfo {
kind: UrlEncode,
operand_source: FirstArg,
});
}
if callee.ends_with("URLDecoder.decode") {
return Some(TransformMethodInfo {
kind: UrlDecode,
operand_source: FirstArg,
});
}
if callee.ends_with("StringEscapeUtils.escapeHtml4")
|| callee.ends_with("StringEscapeUtils.escapeHtml")
|| callee.ends_with("StringEscapeUtils.escapeXml11")
|| callee.ends_with("StringEscapeUtils.escapeXml10")
|| callee.ends_with("StringEscapeUtils.escapeXml")
{
return Some(TransformMethodInfo {
kind: HtmlEscape,
operand_source: FirstArg,
});
}
match method {
"encodeToString" => Some(TransformMethodInfo {
kind: Base64Encode,
operand_source: FirstArg,
}),
"decode" if callee.contains("Base64") => Some(TransformMethodInfo {
kind: Base64Decode,
operand_source: FirstArg,
}),
_ => None,
}
}
fn classify_transform_go(callee: &str) -> Option<TransformMethodInfo> {
use StringOperandSource::*;
use TransformKind::*;
match callee {
"url.QueryEscape" | "url.PathEscape" => Some(TransformMethodInfo {
kind: UrlEncode,
operand_source: FirstArg,
}),
"url.QueryUnescape" | "url.PathUnescape" => Some(TransformMethodInfo {
kind: UrlDecode,
operand_source: FirstArg,
}),
"html.EscapeString" => Some(TransformMethodInfo {
kind: HtmlEscape,
operand_source: FirstArg,
}),
"base64.StdEncoding.EncodeToString"
| "base64.URLEncoding.EncodeToString"
| "base64.RawStdEncoding.EncodeToString"
| "base64.RawURLEncoding.EncodeToString" => Some(TransformMethodInfo {
kind: Base64Encode,
operand_source: FirstArg,
}),
"base64.StdEncoding.DecodeString"
| "base64.URLEncoding.DecodeString"
| "base64.RawStdEncoding.DecodeString"
| "base64.RawURLEncoding.DecodeString" => Some(TransformMethodInfo {
kind: Base64Decode,
operand_source: FirstArg,
}),
_ => None,
}
}
fn classify_transform_ruby(callee: &str) -> Option<TransformMethodInfo> {
use StringOperandSource::*;
use TransformKind::*;
let normalised = callee.replace("::", ".");
match normalised.as_str() {
"CGI.escape" | "URI.encode_www_form_component" => Some(TransformMethodInfo {
kind: UrlEncode,
operand_source: FirstArg,
}),
"CGI.unescape" | "URI.decode_www_form_component" => Some(TransformMethodInfo {
kind: UrlDecode,
operand_source: FirstArg,
}),
"ERB::Util.html_escape" | "ERB.Util.html_escape" | "CGI.escapeHTML" => {
Some(TransformMethodInfo {
kind: HtmlEscape,
operand_source: FirstArg,
})
}
"Base64.strict_encode64" | "Base64.encode64" | "Base64.urlsafe_encode64" => {
Some(TransformMethodInfo {
kind: Base64Encode,
operand_source: FirstArg,
})
}
"Base64.strict_decode64" | "Base64.decode64" | "Base64.urlsafe_decode64" => {
Some(TransformMethodInfo {
kind: Base64Decode,
operand_source: FirstArg,
})
}
_ => None,
}
}
pub fn encode_concrete_for_witness(kind: TransformKind, input: &str) -> Option<String> {
match kind {
TransformKind::HtmlEscape => Some(html_escape_witness(input)),
TransformKind::UrlEncode => url_encode_witness(input),
TransformKind::ShellEscape => Some(shell_escape_witness(input)),
TransformKind::SqlEscape => Some(sql_escape_witness(input)),
TransformKind::Base64Encode => Some(base64_encode_witness(input)),
TransformKind::Base64Decode | TransformKind::UrlDecode => None,
}
}
pub fn decode_concrete_for_witness(kind: TransformKind, input: &str) -> Option<String> {
match kind {
TransformKind::Base64Decode => base64_decode_witness(input),
TransformKind::UrlDecode => url_decode_witness(input),
_ => None,
}
}
fn html_escape_witness(input: &str) -> String {
let mut out = String::with_capacity(input.len() + input.len() / 4);
for ch in input.chars() {
match ch {
'&' => out.push_str("&"),
'<' => out.push_str("<"),
'>' => out.push_str(">"),
'"' => out.push_str("""),
'\'' => out.push_str("'"),
other => out.push(other),
}
}
out
}
fn url_encode_witness(input: &str) -> Option<String> {
if !input.is_ascii() {
return None;
}
let mut out = String::with_capacity(input.len() * 3);
for &b in input.as_bytes() {
if b.is_ascii_alphanumeric() || b == b'-' || b == b'_' || b == b'.' || b == b'~' {
out.push(b as char);
} else {
out.push('%');
out.push(HEX_UPPER[(b >> 4) as usize] as char);
out.push(HEX_UPPER[(b & 0x0f) as usize] as char);
}
}
Some(out)
}
const HEX_UPPER: &[u8; 16] = b"0123456789ABCDEF";
fn shell_escape_witness(input: &str) -> String {
let mut out = String::with_capacity(input.len() + 4);
out.push('\'');
for ch in input.chars() {
if ch == '\'' {
out.push_str("'\\''");
} else {
out.push(ch);
}
}
out.push('\'');
out
}
fn sql_escape_witness(input: &str) -> String {
input.replace('\'', "''")
}
fn base64_encode_witness(input: &str) -> String {
const ALPHABET: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
let bytes = input.as_bytes();
let mut out = String::with_capacity(bytes.len().div_ceil(3) * 4);
for chunk in bytes.chunks(3) {
let b0 = chunk[0] as u32;
let b1 = if chunk.len() > 1 { chunk[1] as u32 } else { 0 };
let b2 = if chunk.len() > 2 { chunk[2] as u32 } else { 0 };
let triple = (b0 << 16) | (b1 << 8) | b2;
out.push(ALPHABET[((triple >> 18) & 0x3f) as usize] as char);
out.push(ALPHABET[((triple >> 12) & 0x3f) as usize] as char);
if chunk.len() > 1 {
out.push(ALPHABET[((triple >> 6) & 0x3f) as usize] as char);
} else {
out.push('=');
}
if chunk.len() > 2 {
out.push(ALPHABET[(triple & 0x3f) as usize] as char);
} else {
out.push('=');
}
}
out
}
fn base64_decode_witness(input: &str) -> Option<String> {
let input = input.trim_end_matches('=');
let mut bytes = Vec::with_capacity(input.len() * 3 / 4);
let mut buf: u32 = 0;
let mut bits: u32 = 0;
for ch in input.chars() {
let val = match ch {
'A'..='Z' => ch as u32 - b'A' as u32,
'a'..='z' => ch as u32 - b'a' as u32 + 26,
'0'..='9' => ch as u32 - b'0' as u32 + 52,
'+' => 62,
'/' => 63,
_ => return None,
};
buf = (buf << 6) | val;
bits += 6;
if bits >= 8 {
bits -= 8;
bytes.push((buf >> bits) as u8);
buf &= (1 << bits) - 1;
}
}
String::from_utf8(bytes).ok()
}
fn url_decode_witness(input: &str) -> Option<String> {
let mut bytes = Vec::with_capacity(input.len());
let mut chars = input.bytes();
while let Some(b) = chars.next() {
match b {
b'%' => {
let h = chars.next()?;
let l = chars.next()?;
let hi = hex_val(h)?;
let lo = hex_val(l)?;
bytes.push((hi << 4) | lo);
}
b'+' => bytes.push(b' '),
other => bytes.push(other),
}
}
String::from_utf8(bytes).ok()
}
fn hex_val(b: u8) -> Option<u8> {
match b {
b'0'..=b'9' => Some(b - b'0'),
b'a'..=b'f' => Some(b - b'a' + 10),
b'A'..=b'F' => Some(b - b'A' + 10),
_ => None,
}
}
fn extract_replace_args(args: &[SymbolicValue], offset: usize) -> Option<(String, String)> {
let pat = args.get(offset)?.as_concrete_str()?;
let rep = args.get(offset + 1)?.as_concrete_str()?;
Some((pat.to_owned(), rep.to_owned()))
}
fn has_concrete_index(args: &[SymbolicValue], offset: usize) -> bool {
args.get(offset)
.map(|a| a.as_concrete_int().is_some())
.unwrap_or(false)
}
pub fn evaluate_string_op_concrete(method: &StringMethod, receiver: &str) -> Option<SymbolicValue> {
match method {
StringMethod::Trim => Some(SymbolicValue::ConcreteStr(receiver.trim().to_owned())),
StringMethod::ToLower => Some(SymbolicValue::ConcreteStr(receiver.to_lowercase())),
StringMethod::ToUpper => Some(SymbolicValue::ConcreteStr(receiver.to_uppercase())),
StringMethod::Replace {
pattern,
replacement,
} => Some(SymbolicValue::ConcreteStr(
receiver.replace(pattern.as_str(), replacement.as_str()),
)),
StringMethod::StrLen => Some(SymbolicValue::Concrete(receiver.len() as i64)),
StringMethod::Substr => {
None
}
}
}
pub fn detect_replace_sanitizer(
pattern: &str,
_replacement: &str,
callee: &str,
lang: Lang,
) -> Option<SanitizerInfo> {
let is_global = is_global_replace(callee, lang);
let mut caps = Cap::empty();
if pattern == "<"
|| pattern == ">"
|| pattern == "\""
|| pattern == "'"
|| pattern.contains("<script")
|| pattern.contains("<img")
|| pattern.contains("<svg")
{
caps |= Cap::HTML_ESCAPE;
}
if pattern == "'" || pattern == "\"" || pattern == "--" || pattern == ";" {
caps |= Cap::SQL_QUERY;
}
if pattern == "$" || pattern == "`" || pattern == "|" || pattern == ";" || pattern == "&" {
caps |= Cap::SHELL_ESCAPE;
}
if caps.is_empty() {
None
} else {
Some(SanitizerInfo {
sanitized_caps: caps,
is_global,
})
}
}
pub fn detect_call_site_replace_sanitizer(
callee: &str,
lang: Lang,
arg_string_literals: &[Option<String>],
) -> Option<Cap> {
let pattern_pos = pattern_arg_position(callee, lang)?;
let pattern = arg_string_literals
.get(pattern_pos)
.and_then(|o| o.as_deref())?;
let replacement = arg_string_literals
.get(pattern_pos + 1)
.and_then(|o| o.as_deref())
.unwrap_or("");
let info = detect_replace_sanitizer(pattern, replacement, callee, lang)?;
if !info.is_global || info.sanitized_caps.is_empty() {
return None;
}
Some(info.sanitized_caps)
}
fn pattern_arg_position(callee: &str, lang: Lang) -> Option<usize> {
let method = bare_method_name(callee);
match lang {
Lang::JavaScript | Lang::TypeScript => match method {
"replace" | "replaceAll" => Some(0),
_ => None,
},
Lang::Python => match method {
"replace" => Some(0),
"sub" if callee == "re.sub" => Some(0),
_ => None,
},
Lang::Ruby => match method {
"gsub" | "sub" => Some(0),
_ => None,
},
Lang::Java => match method {
"replace" | "replaceAll" => Some(0),
_ => None,
},
Lang::Go => match callee {
"strings.Replace" | "strings.ReplaceAll" => Some(1),
_ => None,
},
Lang::Php => match callee {
"str_replace" => Some(0),
_ => None,
},
Lang::Rust => match method {
"replace" | "replacen" => Some(0),
_ => None,
},
_ => None,
}
}
fn is_global_replace(callee: &str, lang: Lang) -> bool {
let method = bare_method_name(callee);
match lang {
Lang::JavaScript | Lang::TypeScript => method == "replaceAll",
Lang::Python => true,
Lang::Ruby => method == "gsub",
Lang::Java => true,
Lang::Go => callee == "strings.ReplaceAll",
Lang::Php => true,
Lang::Rust => true,
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_classify_js_trim() {
let info = classify_string_method("input.trim", &[], Lang::JavaScript).unwrap();
assert_eq!(info.method, StringMethod::Trim);
assert_eq!(info.operand_source, StringOperandSource::Receiver);
}
#[test]
fn test_classify_js_to_lower() {
let info = classify_string_method("s.toLowerCase", &[], Lang::JavaScript).unwrap();
assert_eq!(info.method, StringMethod::ToLower);
}
#[test]
fn test_classify_js_to_upper() {
let info = classify_string_method("s.toUpperCase", &[], Lang::JavaScript).unwrap();
assert_eq!(info.method, StringMethod::ToUpper);
}
#[test]
fn test_classify_js_replace_concrete() {
let args = vec![
SymbolicValue::Symbol(crate::ssa::ir::SsaValue(0)), SymbolicValue::ConcreteStr("<".into()), SymbolicValue::ConcreteStr("<".into()), ];
let info = classify_string_method("s.replace", &args, Lang::JavaScript).unwrap();
match &info.method {
StringMethod::Replace {
pattern,
replacement,
} => {
assert_eq!(pattern, "<");
assert_eq!(replacement, "<");
}
other => panic!("expected Replace, got {:?}", other),
}
}
#[test]
fn test_classify_js_replace_dynamic_pattern() {
let args = vec![
SymbolicValue::Symbol(crate::ssa::ir::SsaValue(0)), SymbolicValue::Symbol(crate::ssa::ir::SsaValue(1)), SymbolicValue::ConcreteStr("".into()), ];
assert!(classify_string_method("s.replace", &args, Lang::JavaScript).is_none());
}
#[test]
fn test_classify_js_substring_concrete_index() {
let args = vec![
SymbolicValue::Symbol(crate::ssa::ir::SsaValue(0)), SymbolicValue::Concrete(0), ];
let info = classify_string_method("s.substring", &args, Lang::JavaScript).unwrap();
assert_eq!(info.method, StringMethod::Substr);
}
#[test]
fn test_classify_js_substring_dynamic_index() {
let args = vec![
SymbolicValue::Symbol(crate::ssa::ir::SsaValue(0)), SymbolicValue::Symbol(crate::ssa::ir::SsaValue(1)), ];
assert!(classify_string_method("s.substring", &args, Lang::JavaScript).is_none());
}
#[test]
fn test_classify_python_strip() {
let info = classify_string_method("s.strip", &[], Lang::Python).unwrap();
assert_eq!(info.method, StringMethod::Trim);
assert_eq!(info.operand_source, StringOperandSource::Receiver);
}
#[test]
fn test_classify_python_lower() {
let info = classify_string_method("s.lower", &[], Lang::Python).unwrap();
assert_eq!(info.method, StringMethod::ToLower);
}
#[test]
fn test_classify_python_len() {
let info = classify_string_method("len", &[], Lang::Python).unwrap();
assert_eq!(info.method, StringMethod::StrLen);
assert_eq!(info.operand_source, StringOperandSource::FirstArg);
}
#[test]
fn test_classify_ruby_downcase() {
let info = classify_string_method("s.downcase", &[], Lang::Ruby).unwrap();
assert_eq!(info.method, StringMethod::ToLower);
}
#[test]
fn test_classify_ruby_gsub() {
let args = vec![
SymbolicValue::Symbol(crate::ssa::ir::SsaValue(0)),
SymbolicValue::ConcreteStr("<".into()),
SymbolicValue::ConcreteStr("<".into()),
];
let info = classify_string_method("s.gsub", &args, Lang::Ruby).unwrap();
match &info.method {
StringMethod::Replace { .. } => {}
other => panic!("expected Replace, got {:?}", other),
}
}
#[test]
fn test_classify_java_trim() {
let info = classify_string_method("s.trim", &[], Lang::Java).unwrap();
assert_eq!(info.method, StringMethod::Trim);
}
#[test]
fn test_classify_java_length() {
let info = classify_string_method("s.length", &[], Lang::Java).unwrap();
assert_eq!(info.method, StringMethod::StrLen);
}
#[test]
fn test_classify_go_trim_space() {
let info = classify_string_method("strings.TrimSpace", &[], Lang::Go).unwrap();
assert_eq!(info.method, StringMethod::Trim);
assert_eq!(info.operand_source, StringOperandSource::FirstArg);
}
#[test]
fn test_classify_go_to_lower() {
let info = classify_string_method("strings.ToLower", &[], Lang::Go).unwrap();
assert_eq!(info.method, StringMethod::ToLower);
assert_eq!(info.operand_source, StringOperandSource::FirstArg);
}
#[test]
fn test_classify_php_strtolower() {
let info = classify_string_method("strtolower", &[], Lang::Php).unwrap();
assert_eq!(info.method, StringMethod::ToLower);
assert_eq!(info.operand_source, StringOperandSource::FirstArg);
}
#[test]
fn test_classify_php_strlen() {
let info = classify_string_method("strlen", &[], Lang::Php).unwrap();
assert_eq!(info.method, StringMethod::StrLen);
}
#[test]
fn test_classify_rust_trim() {
let info = classify_string_method("s.trim", &[], Lang::Rust).unwrap();
assert_eq!(info.method, StringMethod::Trim);
}
#[test]
fn test_classify_c_strlen() {
let info = classify_string_method("strlen", &[], Lang::C).unwrap();
assert_eq!(info.method, StringMethod::StrLen);
}
#[test]
fn test_classify_unknown_method_returns_none() {
assert!(classify_string_method("foo.bar", &[], Lang::JavaScript).is_none());
assert!(classify_string_method("unknown", &[], Lang::Python).is_none());
}
#[test]
fn test_evaluate_trim() {
let result = evaluate_string_op_concrete(&StringMethod::Trim, " hello ");
assert_eq!(result, Some(SymbolicValue::ConcreteStr("hello".into())));
}
#[test]
fn test_evaluate_to_lower() {
let result = evaluate_string_op_concrete(&StringMethod::ToLower, "ABC");
assert_eq!(result, Some(SymbolicValue::ConcreteStr("abc".into())));
}
#[test]
fn test_evaluate_to_upper() {
let result = evaluate_string_op_concrete(&StringMethod::ToUpper, "abc");
assert_eq!(result, Some(SymbolicValue::ConcreteStr("ABC".into())));
}
#[test]
fn test_evaluate_replace() {
let method = StringMethod::Replace {
pattern: "<script>".into(),
replacement: "".into(),
};
let result = evaluate_string_op_concrete(&method, "a<script>b");
assert_eq!(result, Some(SymbolicValue::ConcreteStr("ab".into())));
}
#[test]
fn test_evaluate_strlen() {
let result = evaluate_string_op_concrete(&StringMethod::StrLen, "hello");
assert_eq!(result, Some(SymbolicValue::Concrete(5)));
}
#[test]
fn test_evaluate_substr_returns_none() {
let result = evaluate_string_op_concrete(&StringMethod::Substr, "hello");
assert_eq!(result, None);
}
#[test]
fn test_detect_xss_sanitizer() {
let info = detect_replace_sanitizer("<", "<", "s.replaceAll", Lang::JavaScript).unwrap();
assert!(info.sanitized_caps.contains(Cap::HTML_ESCAPE));
assert!(info.is_global);
}
#[test]
fn test_detect_xss_non_global() {
let info = detect_replace_sanitizer("<", "<", "s.replace", Lang::JavaScript).unwrap();
assert!(info.sanitized_caps.contains(Cap::HTML_ESCAPE));
assert!(!info.is_global);
}
#[test]
fn test_detect_sqli_sanitizer() {
let info = detect_replace_sanitizer("'", "''", "s.replace", Lang::Python).unwrap();
assert!(info.sanitized_caps.contains(Cap::SQL_QUERY));
assert!(info.is_global); }
#[test]
fn test_detect_cmdi_sanitizer() {
let info = detect_replace_sanitizer("|", "", "s.replace", Lang::Python).unwrap();
assert!(info.sanitized_caps.contains(Cap::SHELL_ESCAPE));
}
#[test]
fn test_detect_no_sanitizer_for_neutral_pattern() {
assert!(detect_replace_sanitizer("foo", "bar", "s.replace", Lang::JavaScript).is_none());
}
#[test]
fn test_global_replace_ruby_gsub() {
assert!(is_global_replace("s.gsub", Lang::Ruby));
assert!(!is_global_replace("s.sub", Lang::Ruby));
}
#[test]
fn test_global_replace_go() {
assert!(is_global_replace("strings.ReplaceAll", Lang::Go));
assert!(!is_global_replace("strings.Replace", Lang::Go));
}
#[test]
fn test_classify_transform_js_encode_uri_component() {
let info = classify_transform_method("encodeURIComponent", Lang::JavaScript).unwrap();
assert_eq!(info.kind, TransformKind::UrlEncode);
assert_eq!(info.operand_source, StringOperandSource::FirstArg);
}
#[test]
fn test_classify_transform_js_decode_uri_component() {
let info = classify_transform_method("decodeURIComponent", Lang::JavaScript).unwrap();
assert_eq!(info.kind, TransformKind::UrlDecode);
}
#[test]
fn test_classify_transform_js_btoa() {
let info = classify_transform_method("btoa", Lang::JavaScript).unwrap();
assert_eq!(info.kind, TransformKind::Base64Encode);
}
#[test]
fn test_classify_transform_js_atob() {
let info = classify_transform_method("atob", Lang::JavaScript).unwrap();
assert_eq!(info.kind, TransformKind::Base64Decode);
}
#[test]
fn test_classify_transform_js_he_encode() {
let info = classify_transform_method("he.encode", Lang::JavaScript).unwrap();
assert_eq!(info.kind, TransformKind::HtmlEscape);
}
#[test]
fn test_classify_transform_js_he_escape() {
let info = classify_transform_method("he.escape", Lang::TypeScript).unwrap();
assert_eq!(info.kind, TransformKind::HtmlEscape);
}
#[test]
fn test_classify_transform_js_rich_sanitizer_not_matched() {
assert!(classify_transform_method("DOMPurify.sanitize", Lang::JavaScript).is_none());
assert!(classify_transform_method("sanitizeHtml", Lang::JavaScript).is_none());
assert!(classify_transform_method("xss", Lang::JavaScript).is_none());
}
#[test]
fn test_classify_transform_python_html_escape() {
let info = classify_transform_method("html.escape", Lang::Python).unwrap();
assert_eq!(info.kind, TransformKind::HtmlEscape);
}
#[test]
fn test_classify_transform_python_shlex_quote() {
let info = classify_transform_method("shlex.quote", Lang::Python).unwrap();
assert_eq!(info.kind, TransformKind::ShellEscape);
}
#[test]
fn test_classify_transform_python_urllib_quote() {
let info = classify_transform_method("urllib.parse.quote", Lang::Python).unwrap();
assert_eq!(info.kind, TransformKind::UrlEncode);
}
#[test]
fn test_classify_transform_python_base64() {
let info = classify_transform_method("base64.b64encode", Lang::Python).unwrap();
assert_eq!(info.kind, TransformKind::Base64Encode);
let info = classify_transform_method("base64.b64decode", Lang::Python).unwrap();
assert_eq!(info.kind, TransformKind::Base64Decode);
}
#[test]
fn test_classify_transform_python_rich_sanitizer_not_matched() {
assert!(classify_transform_method("bleach.clean", Lang::Python).is_none());
assert!(classify_transform_method("markupsafe.escape", Lang::Python).is_none());
}
#[test]
fn test_classify_transform_php_htmlspecialchars() {
let info = classify_transform_method("htmlspecialchars", Lang::Php).unwrap();
assert_eq!(info.kind, TransformKind::HtmlEscape);
}
#[test]
fn test_classify_transform_php_urlencode() {
let info = classify_transform_method("urlencode", Lang::Php).unwrap();
assert_eq!(info.kind, TransformKind::UrlEncode);
}
#[test]
fn test_classify_transform_php_base64_encode() {
let info = classify_transform_method("base64_encode", Lang::Php).unwrap();
assert_eq!(info.kind, TransformKind::Base64Encode);
}
#[test]
fn test_classify_transform_php_escapeshellarg() {
let info = classify_transform_method("escapeshellarg", Lang::Php).unwrap();
assert_eq!(info.kind, TransformKind::ShellEscape);
}
#[test]
fn test_classify_transform_php_addslashes() {
let info = classify_transform_method("addslashes", Lang::Php).unwrap();
assert_eq!(info.kind, TransformKind::SqlEscape);
}
#[test]
fn test_classify_transform_unknown_returns_none() {
assert!(classify_transform_method("foobar", Lang::JavaScript).is_none());
assert!(classify_transform_method("unknown", Lang::Python).is_none());
assert!(classify_transform_method("blah", Lang::Php).is_none());
}
#[test]
fn test_classify_transform_java_url_encoder() {
let info = classify_transform_method("URLEncoder.encode", Lang::Java).unwrap();
assert_eq!(info.kind, TransformKind::UrlEncode);
assert_eq!(info.operand_source, StringOperandSource::FirstArg);
let info = classify_transform_method("java.net.URLEncoder.encode", Lang::Java).unwrap();
assert_eq!(info.kind, TransformKind::UrlEncode);
}
#[test]
fn test_classify_transform_java_url_decoder() {
let info = classify_transform_method("URLDecoder.decode", Lang::Java).unwrap();
assert_eq!(info.kind, TransformKind::UrlDecode);
}
#[test]
fn test_classify_transform_java_string_escape_utils() {
let info = classify_transform_method("StringEscapeUtils.escapeHtml4", Lang::Java).unwrap();
assert_eq!(info.kind, TransformKind::HtmlEscape);
let info = classify_transform_method("StringEscapeUtils.escapeXml11", Lang::Java).unwrap();
assert_eq!(info.kind, TransformKind::HtmlEscape);
}
#[test]
fn test_classify_transform_java_base64() {
let info =
classify_transform_method("Base64.getEncoder.encodeToString", Lang::Java).unwrap();
assert_eq!(info.kind, TransformKind::Base64Encode);
let info = classify_transform_method("Base64.getDecoder.decode", Lang::Java).unwrap();
assert_eq!(info.kind, TransformKind::Base64Decode);
}
#[test]
fn test_classify_transform_go_url_query_escape() {
let info = classify_transform_method("url.QueryEscape", Lang::Go).unwrap();
assert_eq!(info.kind, TransformKind::UrlEncode);
assert_eq!(info.operand_source, StringOperandSource::FirstArg);
}
#[test]
fn test_classify_transform_go_url_path_escape() {
let info = classify_transform_method("url.PathEscape", Lang::Go).unwrap();
assert_eq!(info.kind, TransformKind::UrlEncode);
}
#[test]
fn test_classify_transform_go_html_escape() {
let info = classify_transform_method("html.EscapeString", Lang::Go).unwrap();
assert_eq!(info.kind, TransformKind::HtmlEscape);
}
#[test]
fn test_classify_transform_go_base64() {
let info =
classify_transform_method("base64.StdEncoding.EncodeToString", Lang::Go).unwrap();
assert_eq!(info.kind, TransformKind::Base64Encode);
}
#[test]
fn test_classify_transform_ruby_cgi_escape() {
let info = classify_transform_method("CGI.escape", Lang::Ruby).unwrap();
assert_eq!(info.kind, TransformKind::UrlEncode);
let info = classify_transform_method("CGI::escape", Lang::Ruby).unwrap();
assert_eq!(info.kind, TransformKind::UrlEncode);
}
#[test]
fn test_classify_transform_ruby_cgi_unescape() {
let info = classify_transform_method("CGI.unescape", Lang::Ruby).unwrap();
assert_eq!(info.kind, TransformKind::UrlDecode);
}
#[test]
fn test_classify_transform_ruby_erb_html_escape() {
let info = classify_transform_method("ERB::Util.html_escape", Lang::Ruby).unwrap();
assert_eq!(info.kind, TransformKind::HtmlEscape);
}
#[test]
fn test_classify_transform_ruby_uri_encode_form() {
let info = classify_transform_method("URI.encode_www_form_component", Lang::Ruby).unwrap();
assert_eq!(info.kind, TransformKind::UrlEncode);
}
#[test]
fn test_classify_transform_ruby_base64() {
let info = classify_transform_method("Base64.strict_encode64", Lang::Ruby).unwrap();
assert_eq!(info.kind, TransformKind::Base64Encode);
}
#[test]
fn test_classify_transform_ruby_rich_sanitizer_not_matched() {
assert!(classify_transform_method("sanitize", Lang::Ruby).is_none());
assert!(classify_transform_method("strip_tags", Lang::Ruby).is_none());
}
#[test]
fn test_classify_transform_unknown_callee_returns_none_for_new_langs() {
assert!(classify_transform_method("com.example.Foo.bar", Lang::Java).is_none());
assert!(classify_transform_method("mypkg.Quux", Lang::Go).is_none());
assert!(classify_transform_method("MyClass.unknown_method", Lang::Ruby).is_none());
}
#[test]
fn test_encode_concrete_html_escape() {
let result =
encode_concrete_for_witness(TransformKind::HtmlEscape, "<script>alert('xss')</script>");
assert_eq!(
result.unwrap(),
"<script>alert('xss')</script>"
);
}
#[test]
fn test_encode_concrete_html_escape_ampersand() {
let result = encode_concrete_for_witness(TransformKind::HtmlEscape, "a & b < c");
assert_eq!(result.unwrap(), "a & b < c");
}
#[test]
fn test_encode_concrete_url_encode() {
let result = encode_concrete_for_witness(TransformKind::UrlEncode, "hello world");
assert_eq!(result.unwrap(), "hello%20world");
}
#[test]
fn test_encode_concrete_url_encode_special_chars() {
let result = encode_concrete_for_witness(TransformKind::UrlEncode, "a=b&c=d");
assert_eq!(result.unwrap(), "a%3Db%26c%3Dd");
}
#[test]
fn test_encode_concrete_shell_escape() {
let result = encode_concrete_for_witness(TransformKind::ShellEscape, "hello world");
assert_eq!(result.unwrap(), "'hello world'");
}
#[test]
fn test_encode_concrete_shell_escape_with_quotes() {
let result = encode_concrete_for_witness(TransformKind::ShellEscape, "it's");
assert_eq!(result.unwrap(), "'it'\\''s'");
}
#[test]
fn test_encode_concrete_sql_escape() {
let result = encode_concrete_for_witness(TransformKind::SqlEscape, "O'Brien");
assert_eq!(result.unwrap(), "O''Brien");
}
#[test]
fn test_encode_concrete_base64() {
let result = encode_concrete_for_witness(TransformKind::Base64Encode, "hello");
assert_eq!(result.unwrap(), "aGVsbG8=");
}
#[test]
fn test_encode_concrete_base64_roundtrip() {
let encoded = encode_concrete_for_witness(TransformKind::Base64Encode, "test123").unwrap();
let decoded = decode_concrete_for_witness(TransformKind::Base64Decode, &encoded).unwrap();
assert_eq!(decoded, "test123");
}
#[test]
fn test_decode_concrete_url_decode() {
let result = decode_concrete_for_witness(TransformKind::UrlDecode, "hello%20world");
assert_eq!(result.unwrap(), "hello world");
}
#[test]
fn test_decode_concrete_url_decode_plus() {
let result = decode_concrete_for_witness(TransformKind::UrlDecode, "hello+world");
assert_eq!(result.unwrap(), "hello world");
}
#[test]
fn test_verified_cap_html_escape() {
assert_eq!(TransformKind::HtmlEscape.verified_cap(), Cap::HTML_ESCAPE);
assert!(TransformKind::HtmlEscape.is_protective());
}
#[test]
fn test_verified_cap_url_encode() {
assert_eq!(TransformKind::UrlEncode.verified_cap(), Cap::URL_ENCODE);
assert!(TransformKind::UrlEncode.is_protective());
}
#[test]
fn test_verified_cap_shell_escape() {
assert_eq!(TransformKind::ShellEscape.verified_cap(), Cap::SHELL_ESCAPE);
assert!(TransformKind::ShellEscape.is_protective());
}
#[test]
fn test_verified_cap_sql_escape_is_empty() {
assert_eq!(TransformKind::SqlEscape.verified_cap(), Cap::empty());
assert!(!TransformKind::SqlEscape.is_protective());
}
#[test]
fn test_verified_cap_base64_is_empty() {
assert_eq!(TransformKind::Base64Encode.verified_cap(), Cap::empty());
assert!(!TransformKind::Base64Encode.is_protective());
}
#[test]
fn test_verified_cap_url_decode_is_empty() {
assert_eq!(TransformKind::UrlDecode.verified_cap(), Cap::empty());
assert!(!TransformKind::UrlDecode.is_protective());
}
}