use super::DetectionResult;
use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
use once_cell::sync::Lazy;
use regex::{RegexSet, Regex};
static XSS_REGEX_SET: Lazy<RegexSet> = Lazy::new(|| {
RegexSet::new([
r"(?i)<script[^>]*>",
r"(?i)</script>",
r"(?i)\bon\w+\s*=",
r"(?i)javascript\s*:",
r"(?i)data\s*:\s*text/html",
r"(?i)vbscript\s*:",
r"(?i)expression\s*\(",
r"(?i)behavior\s*:",
r"(?i)-moz-binding\s*:",
r#"(?i)url\s*\(\s*["']?\s*javascript"#,
r"(?i)@import",
r"(?i)<iframe",
r"(?i)<(?:object|embed|applet)",
r#"(?i)<form[^>]*action\s*=\s*["']?\s*javascript"#,
r#"(?i)<a[^>]*href\s*=\s*["']?\s*javascript"#,
r#"(?i)<meta[^>]*http-equiv\s*=\s*["']?refresh"#,
r"(?i)<base[^>]*href",
r"(?i)fscommand",
r"(?i)\beval\s*\(",
r#"(?i)(?:setTimeout|setInterval)\s*\(\s*["']"#,
r"(?i)document\s*\.\s*write",
r"(?i)\.(?:innerHTML|outerHTML)\s*=",
r"(?i)document\s*\.\s*location",
r"(?i)window\s*\.\s*location",
r"(?i)document\s*\.\s*cookie",
]).expect("XSS regex patterns should compile")
});
static DANGEROUS_TAGS_AC: Lazy<AhoCorasick> = Lazy::new(|| {
let tags = [
"<script", "<iframe", "<object", "<embed", "<applet", "<form",
"<input", "<button", "<select", "<textarea", "<link", "<style",
"<meta", "<base", "<svg", "<math", "<video", "<audio", "<source",
"<track", "<canvas", "<frame", "<frameset", "<layer", "<ilayer",
"<bgsound", "<isindex", "<marquee", "<blink", "<plaintext",
"<listing", "<xmp", "<noscript", "<template", "<slot", "<portal",
"<img", "<body",
];
AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.match_kind(MatchKind::LeftmostFirst)
.build(&tags)
.expect("Dangerous tags AC should build")
});
static EVENT_HANDLERS_AC: Lazy<AhoCorasick> = Lazy::new(|| {
let handlers = [
"onabort=", "onafterprint=", "onanimationend=", "onanimationiteration=",
"onanimationstart=", "onbeforeprint=", "onbeforeunload=", "onblur=",
"oncanplay=", "oncanplaythrough=", "onchange=", "onclick=", "oncontextmenu=",
"oncopy=", "oncut=", "ondblclick=", "ondrag=", "ondragend=", "ondragenter=",
"ondragleave=", "ondragover=", "ondragstart=", "ondrop=", "ondurationchange=",
"onemptied=", "onended=", "onerror=", "onfocus=", "onfocusin=", "onfocusout=",
"onhashchange=", "oninput=", "oninvalid=", "onkeydown=", "onkeypress=",
"onkeyup=", "onload=", "onloadeddata=", "onloadedmetadata=", "onloadstart=",
"onmessage=", "onmousedown=", "onmouseenter=", "onmouseleave=", "onmousemove=",
"onmouseout=", "onmouseover=", "onmouseup=", "onmousewheel=", "onoffline=",
"ononline=", "onopen=", "onpagehide=", "onpageshow=", "onpaste=", "onpause=",
"onplay=", "onplaying=", "onpopstate=", "onprogress=", "onratechange=",
"onreset=", "onresize=", "onscroll=", "onsearch=", "onseeked=", "onseeking=",
"onselect=", "onshow=", "onstalled=", "onstorage=", "onsubmit=", "onsuspend=",
"ontimeupdate=", "ontoggle=", "ontouchcancel=", "ontouchend=", "ontouchmove=",
"ontouchstart=", "ontransitionend=", "onunload=", "onvolumechange=",
"onwaiting=", "onwheel=", "onpointerdown=", "onpointermove=", "onpointerup=",
"onpointercancel=", "onpointerenter=", "onpointerleave=", "onpointerover=",
"onpointerout=", "ongotpointercapture=", "onlostpointercapture=",
"onbeforeinput=", "onformdata=", "onsecuritypolicyviolation=",
"onslotchange=", "onvisibilitychange=",
];
AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.match_kind(MatchKind::LeftmostFirst)
.build(&handlers)
.expect("Event handlers AC should build")
});
static DANGEROUS_SCHEMES_AC: Lazy<AhoCorasick> = Lazy::new(|| {
let schemes = ["javascript:", "vbscript:", "livescript:", "mocha:"];
AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.match_kind(MatchKind::LeftmostFirst)
.build(&schemes)
.expect("Dangerous schemes AC should build")
});
static QUICK_CHECK_AC: Lazy<AhoCorasick> = Lazy::new(|| {
let patterns = [
"<", "javascript", "vbscript", "on", "eval", "innerhtml", "outerhtml",
"document.", "window.", "%3c", "<", "\\x3c", "\\u003c",
];
AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.match_kind(MatchKind::LeftmostFirst)
.build(&patterns)
.expect("Quick check AC should build")
});
static NORMALIZE_TAG_WS: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"<\s+").expect("Tag whitespace regex should compile")
});
static NORMALIZE_ATTR_WS: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"\s*=\s*").expect("Attribute whitespace regex should compile")
});
#[inline]
pub fn is_xss(input: &str) -> bool {
detect_xss(input).is_injection
}
pub fn detect_xss(input: &str) -> DetectionResult {
if !QUICK_CHECK_AC.is_match(input) {
return DetectionResult::safe();
}
if let Some(result) = check_patterns(input) {
return result;
}
let normalized = normalize_input(input);
if normalized != input {
if let Some(result) = check_patterns(&normalized) {
return result;
}
}
DetectionResult::safe()
}
#[inline]
fn check_patterns(input: &str) -> Option<DetectionResult> {
if XSS_REGEX_SET.is_match(input) {
return Some(DetectionResult::detected("XSS pattern match".to_string()));
}
if DANGEROUS_TAGS_AC.is_match(input) {
return Some(DetectionResult::detected("Dangerous HTML tag".to_string()));
}
if EVENT_HANDLERS_AC.is_match(input) {
return Some(DetectionResult::detected("Event handler".to_string()));
}
if DANGEROUS_SCHEMES_AC.is_match(input) {
return Some(DetectionResult::detected("Dangerous URL scheme".to_string()));
}
None
}
fn normalize_input(input: &str) -> String {
let mut result = input.to_string();
let decoded = html_escape::decode_html_entities(&result);
if decoded != result {
result = decoded.into_owned();
}
if let Ok(decoded) = percent_encoding::percent_decode_str(&result).decode_utf8() {
if decoded != result {
result = decoded.into_owned();
}
}
if result.contains('\0') {
result = result.replace('\0', "");
}
if result.contains("< ") {
result = NORMALIZE_TAG_WS.replace_all(&result, "<").into_owned();
}
if result.contains(" =") || result.contains("= ") {
result = NORMALIZE_ATTR_WS.replace_all(&result, "=").into_owned();
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_script_tag() {
assert!(is_xss("<script>alert(1)</script>"));
assert!(is_xss("<SCRIPT>alert(1)</SCRIPT>"));
assert!(is_xss("<script src=evil.js>"));
assert!(is_xss("<script/src=evil.js>"));
}
#[test]
fn test_event_handlers() {
assert!(is_xss("<img src=x onerror=alert(1)>"));
assert!(is_xss("<body onload=alert(1)>"));
assert!(is_xss("<svg onload=alert(1)>"));
assert!(is_xss("<input onfocus=alert(1) autofocus>"));
}
#[test]
fn test_javascript_url() {
assert!(is_xss("<a href=javascript:alert(1)>click</a>"));
assert!(is_xss("<a href=\"javascript:alert(1)\">click</a>"));
assert!(is_xss("javascript:alert(document.cookie)"));
}
#[test]
fn test_encoded_xss() {
assert!(is_xss("%3Cscript%3Ealert(1)%3C/script%3E"));
assert!(is_xss("<script>alert(1)</script>"));
}
#[test]
fn test_svg_xss() {
assert!(is_xss("<svg onload=alert(1)>"));
assert!(is_xss("<svg><script>alert(1)</script></svg>"));
}
#[test]
fn test_iframe() {
assert!(is_xss("<iframe src=javascript:alert(1)>"));
assert!(is_xss("<iframe src=\"evil.com\">"));
}
#[test]
fn test_safe_input() {
assert!(!is_xss("hello world"));
assert!(!is_xss("This is normal text without any special characters"));
assert!(!is_xss("12345"));
assert!(!is_xss("user@example.com"));
}
#[test]
fn test_dom_xss() {
assert!(is_xss("document.write('<script>')"));
assert!(is_xss("element.innerHTML = userInput"));
assert!(is_xss("eval('malicious code')"));
}
#[test]
fn test_quick_reject() {
assert!(!is_xss("hello world"));
assert!(!is_xss("just some text"));
assert!(!is_xss("numbers 12345"));
}
}