use regex::Regex;
use std::sync::OnceLock;
pub fn escape_html(input: &str) -> String {
input
.replace('&', "&")
.replace('<', "<")
.replace('>', ">")
.replace('"', """)
.replace('\'', "'")
}
pub fn escape_html_attr(input: &str) -> String {
input
.replace('&', "&")
.replace('<', "<")
.replace('>', ">")
.replace('"', """)
.replace('\'', "'")
.replace('\n', " ")
.replace('\r', " ")
}
pub fn escape_javascript(input: &str) -> String {
input
.replace('\\', "\\\\")
.replace('\'', "\\'")
.replace('"', "\\\"")
.replace('\n', "\\n")
.replace('\r', "\\r")
.replace('\t', "\\t")
.replace('<', "\\x3C")
.replace('>', "\\x3E")
.replace('/', "\\/")
}
pub fn escape_url(input: &str) -> String {
urlencoding::encode(input).to_string()
}
static DANGEROUS_PATTERNS: OnceLock<Vec<Regex>> = OnceLock::new();
fn get_dangerous_patterns() -> &'static Vec<Regex> {
DANGEROUS_PATTERNS.get_or_init(|| {
vec![
Regex::new(r"(?i)javascript:").unwrap(),
Regex::new(r"(?i)data:text/html").unwrap(),
Regex::new(r"(?i)vbscript:").unwrap(),
Regex::new(r"(?i)on\w+\s*=").unwrap(),
Regex::new(r"(?i)<(iframe|embed|object|applet|meta|link|base)").unwrap(),
Regex::new(r"(?i)<script").unwrap(),
]
})
}
pub fn detect_xss_patterns(input: &str) -> bool {
get_dangerous_patterns()
.iter()
.any(|pattern| pattern.is_match(input))
}
pub fn sanitize_html(input: &str) -> String {
escape_html(input)
}
pub fn is_safe_url(url: &str) -> bool {
let url_lower = url.to_lowercase();
if url.starts_with('/') || url.starts_with("./") || url.starts_with('#') {
return true;
}
let safe_protocols = ["http://", "https://", "mailto:", "ftp://", "ftps://"];
safe_protocols
.iter()
.any(|protocol| url_lower.starts_with(protocol))
}
pub fn strip_tags_safe(html: &str) -> String {
let mut result = String::with_capacity(html.len());
let chars: Vec<char> = html.chars().collect();
let len = chars.len();
let mut i = 0;
while i < len {
if chars[i] == '<' {
if i + 3 < len && chars[i + 1] == '!' && chars[i + 2] == '-' && chars[i + 3] == '-' {
i += 4;
let mut found_close = false;
while i + 2 < len {
if chars[i] == '-' && chars[i + 1] == '-' && chars[i + 2] == '>' {
i += 3;
found_close = true;
break;
}
i += 1;
}
if !found_close {
break;
}
continue;
}
i += 1;
let mut in_single_quote = false;
let mut in_double_quote = false;
while i < len {
match chars[i] {
'"' if !in_single_quote => in_double_quote = !in_double_quote,
'\'' if !in_double_quote => in_single_quote = !in_single_quote,
'>' if !in_single_quote && !in_double_quote => {
i += 1;
break;
}
_ => {}
}
i += 1;
}
} else {
result.push(chars[i]);
i += 1;
}
}
result
}
#[deprecated(note = "use `escape_html` instead — identical implementation")]
pub fn escape_html_content(input: &str) -> String {
escape_html(input)
}
pub fn escape_css_selector(input: &str) -> String {
if input.is_empty() {
return String::new();
}
let mut result = String::with_capacity(input.len() * 2);
let chars: Vec<char> = input.chars().collect();
for (i, &ch) in chars.iter().enumerate() {
match ch {
'\0' => result.push('\u{FFFD}'),
'\u{0001}'..='\u{001F}' | '\u{007F}' => {
result.push_str(&format!("\\{:x} ", ch as u32));
}
'0'..='9' if i == 0 => {
result.push_str(&format!("\\{:x} ", ch as u32));
}
'-' if i == 0 && chars.len() == 1 => {
result.push('\\');
result.push(ch);
}
'!' | '"' | '#' | '$' | '%' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '.' | '/'
| ':' | ';' | '<' | '=' | '>' | '?' | '@' | '[' | '\\' | ']' | '^' | '`' | '{'
| '|' | '}' | '~' => {
result.push('\\');
result.push(ch);
}
_ => result.push(ch),
}
}
result
}
pub fn validate_html_attr_name(name: &str) -> bool {
!name.is_empty()
&& name
.chars()
.all(|ch| matches!(ch, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' | ':'))
}
pub fn validate_css_selector(input: &str) -> bool {
if input.is_empty() {
return false;
}
input.chars().all(|ch| {
matches!(ch,
'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_'
)
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_escape_html() {
assert_eq!(
escape_html("<script>alert('xss')</script>"),
"<script>alert('xss')</script>"
);
}
#[test]
fn test_escape_html_attr() {
let attr = r#"value" onload="alert('xss')"#;
let escaped = escape_html_attr(attr);
assert!(escaped.contains("""));
assert!(escaped.contains("'"));
}
#[test]
fn test_escape_javascript() {
let script = "'; alert('xss'); var x='";
let escaped = escape_javascript(script);
assert!(escaped.contains("\\'"));
assert_eq!(escaped, "\\'; alert(\\'xss\\'); var x=\\'");
}
#[test]
fn test_escape_url() {
let url = "javascript:alert('xss')";
let escaped = escape_url(url);
assert!(escaped.contains("%3A"));
}
#[test]
fn test_detect_xss_patterns() {
assert!(detect_xss_patterns("<script>alert(1)</script>"));
assert!(detect_xss_patterns(r#"<img src=x onerror="alert(1)">"#));
assert!(detect_xss_patterns("javascript:alert(1)"));
assert!(detect_xss_patterns("<iframe src='evil.com'>"));
assert!(!detect_xss_patterns("Safe text"));
assert!(!detect_xss_patterns("Normal <b>HTML</b>"));
}
#[test]
fn test_is_safe_url() {
assert!(is_safe_url("https://example.com"));
assert!(is_safe_url("http://example.com"));
assert!(is_safe_url("/path/to/page"));
assert!(is_safe_url("./relative/path"));
assert!(!is_safe_url("../parent/path")); assert!(is_safe_url("#section")); assert!(is_safe_url("mailto:user@example.com"));
assert!(!is_safe_url("javascript:alert(1)"));
assert!(!is_safe_url("data:text/html,<script>alert(1)</script>"));
assert!(!is_safe_url("vbscript:alert(1)"));
}
#[test]
fn test_sanitize_html() {
let dangerous = "<script>alert('XSS')</script><b>Bold text</b>";
let sanitized = sanitize_html(dangerous);
assert_eq!(
sanitized,
"<script>alert('XSS')</script><b>Bold text</b>"
);
}
#[test]
fn test_strip_tags_safe_basic() {
assert_eq!(strip_tags_safe("<p>Hello <b>World</b></p>"), "Hello World");
assert_eq!(strip_tags_safe("No tags here"), "No tags here");
assert_eq!(strip_tags_safe(""), "");
}
#[test]
fn test_strip_tags_safe_quoted_attributes() {
assert_eq!(strip_tags_safe(r#"<a title="x>y">Link</a>"#), "Link");
assert_eq!(strip_tags_safe("<a title='x>y'>Link</a>"), "Link");
assert_eq!(
strip_tags_safe(r#"<a title="a>b" href="c>d">Text</a>"#),
"Text"
);
}
#[test]
fn test_strip_tags_safe_html_comments() {
assert_eq!(strip_tags_safe("Hello<!-- comment -->World"), "HelloWorld");
assert_eq!(strip_tags_safe("A<!-- multi\nline -->B"), "AB");
assert_eq!(strip_tags_safe("Hello<!-- unclosed"), "Hello");
}
#[test]
fn test_strip_tags_safe_malformed() {
assert_eq!(strip_tags_safe("Hello<br"), "Hello");
assert_eq!(strip_tags_safe("Hello<"), "Hello");
assert_eq!(strip_tags_safe("Hello<br/>World"), "HelloWorld");
}
#[test]
fn test_escape_html_content() {
assert_eq!(
escape_html_content("<script>alert('XSS')</script>"),
"<script>alert('XSS')</script>"
);
assert_eq!(escape_html_content("safe text"), "safe text");
}
#[test]
fn test_escape_css_selector_basic() {
assert_eq!(escape_css_selector("my-class"), "my-class");
assert_eq!(escape_css_selector("item_123"), "item_123");
assert_eq!(escape_css_selector(""), "");
}
#[test]
fn test_escape_css_selector_metacharacters() {
assert_eq!(escape_css_selector("a.b"), r"a\.b");
assert_eq!(escape_css_selector("a#b"), r"a\#b");
assert_eq!(escape_css_selector("a[0]"), r"a\[0\]");
assert_eq!(escape_css_selector("a{b}"), r"a\{b\}");
assert_eq!(escape_css_selector("a:hover"), r"a\:hover");
}
#[test]
fn test_escape_css_selector_first_char() {
assert_eq!(escape_css_selector("-"), r"\-");
assert_eq!(escape_css_selector("1abc"), r"\31 abc");
}
#[test]
fn test_escape_css_selector_null_and_control() {
assert_eq!(escape_css_selector("\0"), "\u{FFFD}");
assert_eq!(escape_css_selector("\u{0001}"), r"\1 ");
}
#[test]
fn test_validate_css_selector() {
assert!(validate_css_selector("my-class"));
assert!(validate_css_selector("item_123"));
assert!(validate_css_selector("CamelCase"));
assert!(!validate_css_selector(""));
assert!(!validate_css_selector("a.b"));
assert!(!validate_css_selector("a[0]"));
assert!(!validate_css_selector("a{b}"));
assert!(!validate_css_selector("a b"));
}
#[test]
fn test_validate_html_attr_name_valid() {
assert!(validate_html_attr_name("class"));
assert!(validate_html_attr_name("data-value"));
assert!(validate_html_attr_name("aria-label"));
assert!(validate_html_attr_name("xml:lang"));
assert!(validate_html_attr_name("id"));
assert!(validate_html_attr_name("data-my.attr"));
assert!(validate_html_attr_name("A1"));
}
#[test]
fn test_validate_html_attr_name_invalid() {
assert!(!validate_html_attr_name(""));
assert!(!validate_html_attr_name("foo bar"));
assert!(!validate_html_attr_name("onclick=alert(1)"));
assert!(!validate_html_attr_name("a\"b"));
assert!(!validate_html_attr_name("a'b"));
assert!(!validate_html_attr_name("a<b"));
assert!(!validate_html_attr_name("a>b"));
assert!(!validate_html_attr_name("a/b"));
}
}