pub fn escape_text(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for ch in s.chars() {
match ch {
'&' => out.push_str("&"),
'<' => out.push_str("<"),
'>' => out.push_str(">"),
'"' => out.push_str("""),
'\0' => out.push('\u{FFFD}'),
_ => out.push(ch),
}
}
out
}
fn is_url_control_char(c: char) -> bool {
(c as u32) <= 0x1f || c == '\u{7f}'
}
fn has_control_char(url: &str) -> bool {
url.chars().any(is_url_control_char)
}
fn strip_control_chars(url: &str) -> String {
url.chars().filter(|c| !is_url_control_char(*c)).collect()
}
pub fn is_safe_url(url: &str) -> bool {
let stripped = strip_control_chars(url);
let trimmed = stripped.trim_start_matches([' ', '\t', '\n', '\r']);
let bytes = trimmed.as_bytes();
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b'/' | b'?' | b'#' => return true, b':' => {
if i == 0 {
return true; }
let scheme = &trimmed[..i];
let mut chars = scheme.bytes();
let first_ok = chars.next().is_some_and(|c| c.is_ascii_alphabetic());
let rest_ok = chars.all(|c| c.is_ascii_alphanumeric() || matches!(c, b'+' | b'.' | b'-'));
if !(first_ok && rest_ok) {
return true;
}
return matches!(scheme.to_ascii_lowercase().as_str(), "http" | "https" | "mailto" | "tel");
},
_ => i += 1,
}
}
true }
pub fn sanitize_url(url: &str) -> String {
if has_control_char(url) {
if is_safe_url(url) { strip_control_chars(url) } else { "#".to_string() }
} else if is_safe_url(url) {
url.to_string()
} else {
"#".to_string()
}
}
pub fn escape_url(s: &str) -> String {
let bytes = s.as_bytes();
let mut out = String::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
let safe = b.is_ascii_alphanumeric()
|| matches!(
b,
b'-'
| b'_'
| b'.'
| b'~'
| b'!'
| b'$'
| b'\''
| b'('
| b')'
| b'*'
| b','
| b';'
| b'='
| b'+'
| b':'
| b'@'
| b'/'
| b'?'
| b'#'
);
if b == b'%' && i + 2 < bytes.len() && bytes[i + 1].is_ascii_hexdigit() && bytes[i + 2].is_ascii_hexdigit() {
out.push('%');
out.push(bytes[i + 1] as char);
out.push(bytes[i + 2] as char);
i += 3;
continue;
}
if b == b'&' {
out.push('&');
} else if safe {
out.push(b as char);
} else {
out.push_str(&format!("%{:02X}", b));
}
i += 1;
}
out
}
pub fn escape_attr(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for ch in s.chars() {
match ch {
'&' => out.push_str("&"),
'"' => out.push_str("""),
'<' => out.push_str("<"),
'>' => out.push_str(">"),
'\0' => out.push('\u{FFFD}'),
_ => out.push(ch),
}
}
out
}
#[cfg(test)]
mod url_safety_tests {
use super::{is_safe_url, sanitize_url};
#[test]
fn rejects_dangerous_schemes() {
for u in [
"javascript:alert(1)",
"JavaScript:alert(1)",
"JAVASCRIPT:alert(1)",
" javascript:alert(1)",
"data:text/html,<script>x</script>",
"vbscript:msgbox(1)",
"file:///etc/passwd",
] {
assert!(!is_safe_url(u), "should reject {u:?}");
assert_eq!(sanitize_url(u), "#");
}
}
#[test]
fn rejects_control_char_scheme_bypass() {
for u in [
"java\tscript:alert(1)",
"java\nscript:alert(1)",
"java\rscript:alert(1)",
"\u{0}javascript:alert(1)",
"java\u{0}script:alert(1)",
"jav\u{1}ascript:alert(1)",
"javascript\t:alert(1)",
"\tjavascript:alert(1)",
] {
assert!(!is_safe_url(u), "should reject {u:?}");
assert_eq!(sanitize_url(u), "#", "should fall back to # for {u:?}");
}
}
#[test]
fn strips_control_chars_from_otherwise_safe_url() {
assert_eq!(sanitize_url("https://exa\tmple.com"), "https://example.com");
assert_eq!(sanitize_url("/re\nl/path"), "/rel/path");
}
#[test]
fn strip_preserves_multibyte_utf8() {
assert_eq!(sanitize_url("https://exämple.com/\tpath"), "https://exämple.com/path");
assert!(is_safe_url("https://exämple.com/\tpath"));
assert_eq!(sanitize_url("/café\n/menu"), "/café/menu");
}
#[test]
fn allows_safe_schemes_and_relative() {
for u in [
"https://example.com/x",
"http://example.com",
"HTTPS://EXAMPLE.COM",
"mailto:a@b.com",
"tel:+15551234",
"/abs/path",
"./rel/path",
"../up/path",
"foo/bar",
"#fragment",
"?q=a:b",
"./weird:name",
"page#a:b",
] {
assert!(is_safe_url(u), "should allow {u:?}");
assert_eq!(sanitize_url(u), u);
}
}
}