use memchr::{memchr, memchr2, memchr3};
#[allow(dead_code)]
const TEXT_ESCAPE_CHARS: &[u8] = b"<>&";
#[allow(dead_code)]
const ATTR_ESCAPE_CHARS: &[u8] = b"<>&\"'";
const TEXT_ESCAPE_TABLE: [bool; 256] = {
let mut table = [false; 256];
table[b'<' as usize] = true;
table[b'>' as usize] = true;
table[b'&' as usize] = true;
table[b'"' as usize] = true;
table
};
const ATTR_ESCAPE_TABLE: [bool; 256] = {
let mut table = [false; 256];
table[b'<' as usize] = true;
table[b'>' as usize] = true;
table[b'&' as usize] = true;
table[b'"' as usize] = true;
table[b'\'' as usize] = true;
table
};
#[inline]
pub fn escape_text_into(out: &mut Vec<u8>, input: &[u8]) {
if input.is_empty() {
return;
}
let mut start = 0usize;
while let Some(rel) = first_text_escape(&input[start..]) {
let pos = start + rel;
if pos > start {
out.extend_from_slice(&input[start..pos]);
}
push_text_escape(out, input[pos]);
start = pos + 1;
}
if start < input.len() {
out.extend_from_slice(&input[start..]);
}
}
#[inline]
pub fn escape_full_into(out: &mut Vec<u8>, input: &[u8]) {
if input.is_empty() {
return;
}
let mut start = 0usize;
while let Some(rel) = first_attr_escape(&input[start..]) {
let pos = start + rel;
if pos > start {
out.extend_from_slice(&input[start..pos]);
}
push_attr_escape(out, input[pos]);
start = pos + 1;
}
if start < input.len() {
out.extend_from_slice(&input[start..]);
}
}
#[inline]
pub fn escape_attr_into(out: &mut Vec<u8>, input: &[u8]) {
escape_full_into(out, input)
}
#[inline]
pub fn needs_text_escape(input: &[u8]) -> bool {
input.iter().any(|&b| TEXT_ESCAPE_TABLE[b as usize])
}
#[inline]
pub fn needs_attr_escape(input: &[u8]) -> bool {
input.iter().any(|&b| ATTR_ESCAPE_TABLE[b as usize])
}
#[inline]
fn first_text_escape(input: &[u8]) -> Option<usize> {
let a = memchr3(b'<', b'>', b'&', input);
let b = memchr(b'"', input);
min_opt(a, b)
}
#[inline]
fn first_attr_escape(input: &[u8]) -> Option<usize> {
let a = memchr3(b'<', b'>', b'&', input);
let b = memchr2(b'"', b'\'', input);
min_opt(a, b)
}
#[inline]
fn min_opt(a: Option<usize>, b: Option<usize>) -> Option<usize> {
match (a, b) {
(Some(a), Some(b)) => Some(a.min(b)),
(Some(a), None) => Some(a),
(None, Some(b)) => Some(b),
(None, None) => None,
}
}
pub fn escape_text(input: &[u8]) -> Vec<u8> {
let mut out = Vec::with_capacity(input.len() + input.len() / 8);
escape_text_into(&mut out, input);
out
}
pub fn escape_text_to_string(input: &str) -> String {
let escaped = escape_text(input.as_bytes());
unsafe { String::from_utf8_unchecked(escaped) }
}
#[inline]
fn is_ascii_punctuation(b: u8) -> bool {
matches!(
b,
b'!' | b'"'
| b'#'
| b'$'
| b'%'
| b'&'
| b'\''
| b'('
| b')'
| b'*'
| b'+'
| b','
| b'-'
| b'.'
| b'/'
| b':'
| b';'
| b'<'
| b'='
| b'>'
| b'?'
| b'@'
| b'['
| b'\\'
| b']'
| b'^'
| b'_'
| b'`'
| b'{'
| b'|'
| b'}'
| b'~'
)
}
#[inline]
pub fn url_escape_link_destination(out: &mut Vec<u8>, input: &[u8]) {
if memchr(b'&', input).is_none() {
url_escape_link_destination_raw(out, input);
return;
}
let input_str = core::str::from_utf8(input).unwrap_or("");
let decoded = html_escape::decode_html_entities(input_str);
let decoded_bytes = decoded.as_bytes();
url_escape_link_destination_raw(out, decoded_bytes);
}
#[inline]
fn push_text_escape(out: &mut Vec<u8>, b: u8) {
match b {
b'<' => out.extend_from_slice(b"<"),
b'>' => out.extend_from_slice(b">"),
b'&' => out.extend_from_slice(b"&"),
b'"' => out.extend_from_slice(b"""),
_ => out.push(b),
}
}
#[inline]
fn push_attr_escape(out: &mut Vec<u8>, b: u8) {
match b {
b'<' => out.extend_from_slice(b"<"),
b'>' => out.extend_from_slice(b">"),
b'&' => out.extend_from_slice(b"&"),
b'"' => out.extend_from_slice(b"""),
b'\'' => out.extend_from_slice(b"'"),
_ => out.push(b),
}
}
#[inline]
fn url_escape_link_destination_raw(out: &mut Vec<u8>, input: &[u8]) {
const HEX: &[u8; 16] = b"0123456789ABCDEF";
if input.is_ascii()
&& memchr2(b'\\', b' ', input).is_none()
&& memchr3(b'"', b'<', b'>', input).is_none()
&& memchr2(b'&', b'\'', input).is_none()
&& !input
.iter()
.any(|&b| matches!(b, 0x00..=0x08 | 0x0B | 0x0C | 0x0E..=0x1F | 0x7F))
{
out.extend_from_slice(input);
return;
}
let mut pos = 0;
while pos < input.len() {
let b = input[pos];
if b == b'\\' && pos + 1 < input.len() && is_ascii_punctuation(input[pos + 1]) {
pos += 1;
let escaped = input[pos];
match escaped {
b'<' => out.extend_from_slice(b"<"),
b'>' => out.extend_from_slice(b">"),
b'&' => out.extend_from_slice(b"&"),
b'"' => out.extend_from_slice(b"%22"),
b'\'' => out.extend_from_slice(b"'"),
_ => out.push(escaped),
}
pos += 1;
continue;
}
match b {
b'\\' => out.extend_from_slice(b"%5C"),
b' ' => out.extend_from_slice(b"%20"),
b'"' => out.extend_from_slice(b"%22"),
b'<' => out.extend_from_slice(b"<"),
b'>' => out.extend_from_slice(b">"),
b'&' => out.extend_from_slice(b"&"),
b'\'' => out.extend_from_slice(b"'"),
0x00..=0x08 | 0x0B | 0x0C | 0x0E..=0x1F | 0x7F => {
out.push(b'%');
out.push(HEX[(b >> 4) as usize]);
out.push(HEX[(b & 0xF) as usize]);
}
0x80..=0xFF => {
out.push(b'%');
out.push(HEX[(b >> 4) as usize]);
out.push(HEX[(b & 0xF) as usize]);
}
_ => out.push(b),
}
pos += 1;
}
}
#[inline]
pub fn url_encode_then_html_escape(out: &mut Vec<u8>, input: &[u8]) {
const HEX: &[u8; 16] = b"0123456789ABCDEF";
for &b in input {
match b {
b'\\' => out.extend_from_slice(b"%5C"),
b'[' => out.extend_from_slice(b"%5B"),
b']' => out.extend_from_slice(b"%5D"),
b'`' => out.extend_from_slice(b"%60"),
b' ' => out.extend_from_slice(b"%20"),
b'<' => out.extend_from_slice(b"<"),
b'>' => out.extend_from_slice(b">"),
b'&' => out.extend_from_slice(b"&"),
b'"' => out.extend_from_slice(b"""),
b'\'' => out.extend_from_slice(b"'"),
0x00..=0x08 | 0x0B | 0x0C | 0x0E..=0x1F | 0x80..=0xFF => {
out.push(b'%');
out.push(HEX[(b >> 4) as usize]);
out.push(HEX[(b & 0xF) as usize]);
}
_ => out.push(b),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_escape_text_basic() {
let mut out = Vec::new();
escape_text_into(&mut out, b"Hello, World!");
assert_eq!(out, b"Hello, World!");
}
#[test]
fn test_escape_text_lt() {
let mut out = Vec::new();
escape_text_into(&mut out, b"<script>");
assert_eq!(out, b"<script>");
}
#[test]
fn test_escape_text_gt() {
let mut out = Vec::new();
escape_text_into(&mut out, b"1 > 0");
assert_eq!(out, b"1 > 0");
}
#[test]
fn test_escape_text_amp() {
let mut out = Vec::new();
escape_text_into(&mut out, b"a & b");
assert_eq!(out, b"a & b");
}
#[test]
fn test_escape_text_mixed() {
let mut out = Vec::new();
escape_text_into(&mut out, b"<a href=\"test\">link & stuff</a>");
assert_eq!(
out,
b"<a href="test">link & stuff</a>"
);
}
#[test]
fn test_escape_text_empty() {
let mut out = Vec::new();
escape_text_into(&mut out, b"");
assert_eq!(out, b"");
}
#[test]
fn test_escape_attr_quotes() {
let mut out = Vec::new();
escape_full_into(&mut out, b"\"hello\"");
assert_eq!(out, b""hello"");
}
#[test]
fn test_escape_attr_single_quote() {
let mut out = Vec::new();
escape_full_into(&mut out, b"it's");
assert_eq!(out, b"it's");
}
#[test]
fn test_escape_attr_all() {
let mut out = Vec::new();
escape_full_into(&mut out, b"<>&\"'");
assert_eq!(out, b"<>&"'");
}
#[test]
fn test_needs_escape() {
assert!(!needs_text_escape(b"hello"));
assert!(needs_text_escape(b"<hello>"));
assert!(needs_text_escape(b"a & b"));
assert!(!needs_text_escape(b""));
}
#[test]
fn test_escape_consecutive() {
let mut out = Vec::new();
escape_text_into(&mut out, b"<<<");
assert_eq!(out, b"<<<");
}
#[test]
fn test_escape_at_boundaries() {
let mut out = Vec::new();
escape_text_into(&mut out, b"<");
assert_eq!(out, b"<");
out.clear();
escape_text_into(&mut out, b"hello<");
assert_eq!(out, b"hello<");
out.clear();
escape_text_into(&mut out, b"<hello");
assert_eq!(out, b"<hello");
}
#[test]
fn test_escape_to_string() {
let result = escape_text_to_string("<script>");
assert_eq!(result, "<script>");
}
#[test]
fn test_escape_unicode() {
let mut out = Vec::new();
escape_text_into(&mut out, "Hallo Welt! <tag>".as_bytes());
assert_eq!(out, b"Hallo Welt! <tag>");
}
}