bext-waf 0.2.0

Web Application Firewall for bext — rate limiting, IP filtering, GeoIP, rule engine
Documentation
//! Cross-site scripting (XSS) detection — script tags, event handlers,
//! javascript: URIs, data: URIs, expression() CSS, and encoded variants.

use std::sync::OnceLock;

use regex::RegexSet;

static XSS_PATTERNS: OnceLock<RegexSet> = OnceLock::new();

static XSS_DESCRIPTIONS: &[&str] = &[
    "Script tag injection",
    "Script close tag",
    "Event handler: onload",
    "Event handler: onerror",
    "Event handler: onclick/onmouseover/onfocus",
    "Event handler: onmouseenter/onkeydown/onkeyup/onkeypress",
    "javascript: URI scheme",
    "data: URI scheme (text/html)",
    "SVG onload injection",
    "IFRAME injection",
    "IMG src with script",
    "CSS expression() injection",
    "vbscript: URI scheme",
    "HTML entity encoded script",
    "Event handler with whitespace evasion (on[ws]load)",
    "UTF-7 encoded script tag (+ADw-script)",
    "Backslash hex-encoded tag (\\x3c)",
    "Script tag with whitespace break (<scri pt>)",
    "Unicode fullwidth angle bracket script tag",
];

fn patterns() -> &'static RegexSet {
    XSS_PATTERNS.get_or_init(|| {
        RegexSet::new([
            // 0: <script> tag (with optional attributes)
            r"(?i)<\s*script\b[^>]*>",
            // 1: </script>
            r"(?i)<\s*/\s*script\s*>",
            // 2: onload handler
            r"(?i)\bon(load)\s*=",
            // 3: onerror handler
            r"(?i)\bon(error)\s*=",
            // 4: onclick, onmouseover, onfocus
            r"(?i)\bon(click|mouseover|focus)\s*=",
            // 5: onmouseenter, onkeydown, etc.
            r"(?i)\bon(mouseenter|keydown|keyup|keypress|change|submit|blur|input|begin|end|abort|animationend|animationstart|toggle)\s*=",
            // 6: javascript: URI
            r"(?i)javascript\s*:",
            // 7: data: text/html URI
            r"(?i)data\s*:\s*text/html",
            // 8: SVG with onload
            r"(?i)<\s*svg\b[^>]*\bon\w+\s*=",
            // 9: IFRAME injection
            r"(?i)<\s*iframe\b",
            // 10: IMG src with script
            r"(?i)<\s*img\b[^>]*\bon\w+\s*=",
            // 11: CSS expression()
            r"(?i)expression\s*\(",
            // 12: vbscript: URI
            r"(?i)vbscript\s*:",
            // 13: HTML entity encoded <script
            r"(?i)(&lt;|&#60;|&#x3c;)\s*script",
            // 14: Event handler with whitespace evasion
            r"(?i)\bon[\t\n\r\x0c]+(load|error|click|mouseover|focus|mouseenter|keydown|begin)\s*=",
            // 15: UTF-7 encoded script tag
            r"(?i)\+ADw-\s*script",
            // 16: Backslash hex-encoded tag
            r"(?i)(\\x3c|\\u003c)\s*script",
            // 17: Script tag with whitespace break
            r"(?i)<\s*scri[\s\x00]+pt\b",
            // 18: Unicode fullwidth angle brackets: U+FF1C (<) U+FF1E (>)
            r"(?i)\x{ff1c}\s*script",
        ])
        .expect("XSS regex patterns must compile")
    })
}

/// Check an input string for XSS patterns.
/// Returns `Some(description)` if a pattern matches.
pub fn check_xss(input: &str) -> Option<String> {
    let set = patterns();
    let matches: Vec<_> = set.matches(input).into_iter().collect();
    if matches.is_empty() {
        None
    } else {
        let idx = matches[0];
        Some(
            XSS_DESCRIPTIONS
                .get(idx)
                .unwrap_or(&"XSS attack")
                .to_string(),
        )
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    // ---- Positive detections ----

    #[test]
    fn detects_script_tag() {
        assert!(check_xss("<script>alert('xss')</script>").is_some());
        assert!(check_xss("<SCRIPT SRC=http://evil.com/xss.js></SCRIPT>").is_some());
    }

    #[test]
    fn detects_script_with_attributes() {
        assert!(check_xss(r#"<script type="text/javascript">alert(1)</script>"#).is_some());
    }

    #[test]
    fn detects_onload() {
        assert!(check_xss(r#"<body onload=alert('XSS')>"#).is_some());
    }

    #[test]
    fn detects_onerror() {
        assert!(check_xss(r#"<img src=x onerror=alert(1)>"#).is_some());
    }

    #[test]
    fn detects_onclick() {
        assert!(check_xss(r#"<div onclick=alert(1)>click</div>"#).is_some());
    }

    #[test]
    fn detects_onmouseover() {
        assert!(check_xss(r#"<a onmouseover=alert(1)>hover</a>"#).is_some());
    }

    #[test]
    fn detects_onfocus() {
        assert!(check_xss(r#"<input onfocus=alert(1) autofocus>"#).is_some());
    }

    #[test]
    fn detects_javascript_uri() {
        assert!(check_xss("javascript:alert(document.cookie)").is_some());
        assert!(check_xss("JAVASCRIPT : void(0)").is_some());
    }

    #[test]
    fn detects_data_uri() {
        assert!(check_xss("data:text/html,<script>alert(1)</script>").is_some());
    }

    #[test]
    fn detects_svg_onload() {
        assert!(check_xss(r#"<svg onload=alert(1)>"#).is_some());
    }

    #[test]
    fn detects_iframe() {
        assert!(check_xss(r#"<iframe src="http://evil.com"></iframe>"#).is_some());
    }

    #[test]
    fn detects_img_event() {
        assert!(check_xss(r#"<img src=x onerror=alert(1)>"#).is_some());
    }

    #[test]
    fn detects_css_expression() {
        assert!(check_xss("background: expression(alert(1))").is_some());
    }

    #[test]
    fn detects_vbscript() {
        assert!(check_xss("vbscript:MsgBox(1)").is_some());
    }

    #[test]
    fn detects_entity_encoded() {
        assert!(check_xss("&lt;script&gt;alert(1)&lt;/script&gt;").is_some());
        assert!(check_xss("&#60;script>alert(1)</script>").is_some());
    }

    // Polyglot payload
    #[test]
    fn detects_polyglot() {
        let payload = r#"jaVasCript:/*-/*`/*\`/*'/*"/**/(/* */oNcliCk=alert() )//%0D%0A%0d%0a//</stYle/</titLe/</teXtarEa/</scRipt/--!>\x3csVg/<sVg/oNloAd=alert()//>\x3e"#;
        assert!(check_xss(payload).is_some());
    }

    // ---- False-positive checks ----

    #[test]
    fn allows_normal_html_content() {
        assert!(check_xss("This is a <b>bold</b> statement").is_none());
    }

    #[test]
    fn allows_normal_text() {
        assert!(check_xss("Hello world, welcome to our site!").is_none());
    }

    #[test]
    fn allows_css_properties() {
        assert!(check_xss("color: red; font-size: 14px").is_none());
    }

    #[test]
    fn allows_normal_url() {
        assert!(check_xss("https://example.com/page?q=test").is_none());
    }

    #[test]
    fn allows_normal_json() {
        assert!(check_xss(r#"{"key": "value", "count": 42}"#).is_none());
    }

    #[test]
    fn allows_angle_brackets_in_math() {
        assert!(check_xss("a < b && c > d").is_none());
    }
}