use std::borrow::Cow;
pub(crate) fn escape_html_str(s: &str) -> Cow<'_, str> {
if !s
.bytes()
.any(|b| matches!(b, b'&' | b'<' | b'>' | b'"' | b'\''))
{
return Cow::Borrowed(s);
}
let mut out = String::with_capacity(s.len() + 16);
for ch in s.chars() {
match ch {
'&' => out.push_str("&"),
'<' => out.push_str("<"),
'>' => out.push_str(">"),
'"' => out.push_str("""),
'\'' => out.push_str("'"),
c => out.push(c),
}
}
Cow::Owned(out)
}
const UPPER_HEX: &[u8; 16] = b"0123456789ABCDEF";
#[inline]
fn is_unreserved(b: u8) -> bool {
b.is_ascii_alphanumeric() || matches!(b, b'-' | b'.' | b'_' | b'~')
}
#[inline]
fn keep_segment(b: u8) -> bool {
is_unreserved(b)
|| matches!(
b,
b'!' | b'$'
| b'&'
| b'\''
| b'('
| b')'
| b'*'
| b'+'
| b','
| b';'
| b'='
| b':'
| b'@'
)
}
#[inline]
fn keep_path(b: u8) -> bool {
keep_segment(b) || b == b'/'
}
#[inline]
fn keep_value(b: u8) -> bool {
is_unreserved(b)
}
fn percent_encode_into(text: &str, keep: fn(u8) -> bool, space_to_plus: bool, out: &mut String) {
for &b in text.as_bytes() {
if space_to_plus && b == b' ' {
out.push('+');
} else if keep(b) {
out.push(b as char);
} else {
out.push('%');
out.push(UPPER_HEX[(b >> 4) as usize] as char);
out.push(UPPER_HEX[(b & 0x0f) as usize] as char);
}
}
}
pub(crate) fn percent_encode_str(text: &str, component: &str) -> Option<String> {
let (keep, space_to_plus): (fn(u8) -> bool, bool) = match component {
"path" => (keep_path, false),
"segment" => (keep_segment, false),
"query" => (keep_value, false),
"form" => (keep_value, true),
_ => return None,
};
let mut out = String::with_capacity(text.len());
percent_encode_into(text, keep, space_to_plus, &mut out);
Some(out)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn escape_html_metacharacters() {
assert_eq!(
escape_html_str("<script>alert(1)</script>"),
"<script>alert(1)</script>"
);
assert_eq!(escape_html_str("a & b"), "a & b");
assert_eq!(escape_html_str("say \"hi\""), "say "hi"");
assert_eq!(escape_html_str("it's"), "it's");
}
#[test]
fn escape_html_passthrough_borrows() {
assert!(matches!(
escape_html_str("café 北京"),
Cow::Borrowed("café 北京")
));
assert_eq!(escape_html_str("<"), "&lt;");
}
#[test]
fn escape_html_not_idempotent_by_design() {
assert_eq!(escape_html_str("&"), "&");
assert_eq!(escape_html_str("&"), "&amp;");
}
#[test]
fn percent_encode_unreserved_untouched() {
assert_eq!(
percent_encode_str("AZaz09-._~", "query").unwrap(),
"AZaz09-._~"
);
}
#[test]
fn percent_encode_query_encodes_reserved() {
assert_eq!(
percent_encode_str("a&b=c+d", "query").unwrap(),
"a%26b%3Dc%2Bd"
);
}
#[test]
fn percent_encode_form_space_to_plus() {
assert_eq!(percent_encode_str("a b+c", "form").unwrap(), "a+b%2Bc");
}
#[test]
fn percent_encode_utf8_bytes() {
assert_eq!(percent_encode_str("é", "query").unwrap(), "%C3%A9");
}
#[test]
fn percent_encode_segment_vs_path() {
assert_eq!(percent_encode_str("a/b", "segment").unwrap(), "a%2Fb");
assert_eq!(percent_encode_str("a/b", "path").unwrap(), "a/b");
}
#[test]
fn percent_encode_output_is_ascii() {
assert!(percent_encode_str("Москва ☕", "form").unwrap().is_ascii());
}
#[test]
fn percent_encode_unknown_component_is_none() {
assert!(percent_encode_str("x", "nonsense").is_none());
}
}