use pastey::paste;
use std::borrow::Cow;
macro_rules! find_u8_body {
($slice:expr, $ch1:literal $(,)?) => {
memchr::memchr($ch1, $slice)
};
($slice:expr, $ch1:literal, $ch2:literal $(,)?) => {
memchr::memchr2($ch1, $ch2, $slice)
};
($slice:expr, $ch1:literal, $ch2:literal, $ch3:literal $(,)?) => {
memchr::memchr3($ch1, $ch2, $ch3, $slice)
};
($slice:expr, $($ch:literal),+) => {
$slice.iter().position(|c| matches!(c, $($ch)|+))
};
}
macro_rules! escape_fn {
(
$(#[$meta:meta])*
$vis:vis fn $name:ident;
$(#[$bytes_meta:meta])*
$bytes_vis:vis fn $bytes_name:ident;
{
$($ch:literal => $entity:literal,)+
}
) => {
paste! {
$(#[$meta])*
$vis fn $name<'a, S: Into<Cow<'a, str>>>(input: S) -> Cow<'a, str> {
let input = input.into();
match [<$name _bytes_internal>](input.as_bytes()) {
Some(output) => String::from_utf8(output).unwrap().into(),
None => input,
}
}
$(#[$bytes_meta])*
$bytes_vis fn $bytes_name<'a, S: Into<Cow<'a, [u8]>>>(input: S) -> Cow<'a, [u8]> {
let input = input.into();
match [<$name _bytes_internal>](&*input) {
Some(output) => output.into(),
None => input,
}
}
#[inline(always)]
fn [<$name _bytes_internal>](raw: &[u8]) -> Option<Vec<u8>> {
#[inline]
fn find_u8(haystack: &[u8]) -> Option<usize> {
find_u8_body!(haystack, $($ch),+)
}
#[inline]
const fn map_u8(c: u8) -> &'static [u8] {
match c {
$( $ch => $entity, )+
_ => b"",
}
}
if let Some(i) = find_u8(raw) {
let mut output: Vec<u8> = Vec::with_capacity(raw.len().saturating_mul(2));
output.extend_from_slice(&raw[..i]);
output.extend_from_slice(map_u8(raw[i]));
debug_assert!(i < usize::MAX);
#[allow(clippy::arithmetic_side_effects)]
let mut remainder = &raw[i+1..];
while let Some(i) = find_u8(remainder) {
output.extend_from_slice(&remainder[..i]);
output.extend_from_slice(map_u8(remainder[i]));
debug_assert!(i < usize::MAX);
#[allow(clippy::arithmetic_side_effects)]
let n = i + 1; remainder = &remainder[n..];
}
output.extend_from_slice(&remainder);
Some(output)
} else {
None
}
}
}
}
}
escape_fn! {
pub fn escape_text;
pub fn escape_text_bytes;
{
b'&' => b"&",
b'<' => b"<",
b'>' => b">",
}
}
escape_fn! {
pub fn escape_attribute;
pub fn escape_attribute_bytes;
{
b'&' => b"&",
b'<' => b"<",
b'>' => b">",
b'"' => b""", }
}
escape_fn! {
pub fn escape_all_quotes;
pub fn escape_all_quotes_bytes;
{
b'&' => b"&",
b'<' => b"<",
b'>' => b">",
b'"' => b""", b'\'' => b"'", }
}
#[cfg(test)]
mod tests {
use super::*;
use assert2::assert;
use pastey::paste;
macro_rules! test {
($name:ident, $($test:tt)+) => {
#[test]
fn $name() {
#![allow(clippy::string_lit_as_bytes)]
assert!($($test)+);
}
};
}
macro_rules! test_all {
($name:ident, $in:expr, $out:expr) => {
paste! {
test!([<escape_text_ $name>], escape_text($in) == $out);
test!(
[<escape_attribute_ $name>],
escape_attribute($in) == $out
);
test!(
[<escape_all_quotes_ $name>],
escape_all_quotes($in) == $out
);
test!(
[<escape_text_bytes_ $name>],
escape_text_bytes($in.as_bytes()) == $out.as_bytes()
);
test!(
[<escape_attribute_bytes_ $name>],
escape_attribute_bytes($in.as_bytes()) == $out.as_bytes()
);
test!(
[<escape_all_quotes_bytes_ $name>],
escape_all_quotes_bytes($in.as_bytes()) == $out.as_bytes()
);
}
};
}
test_all!(none, "", "");
test_all!(clean, "clean", "clean");
test_all!(lt_gt, "< >", "< >");
test_all!(amp, "&", "&amp;");
test_all!(prefix_amp, "prefix&", "prefix&");
test_all!(emoji_amp, "☺️&☺️", "☺️&☺️");
test_all!(
special_clean,
"Björk and Борис OBrien ❤️, “love beats hate”",
"Björk and Борис OBrien ❤️, “love beats hate”"
);
test!(
escape_text_quotes,
escape_text("He said, \"That's mine.\"") == "He said, \"That's mine.\""
);
test!(
escape_attribute_quotes,
escape_attribute("He said, \"That's mine.\"")
== "He said, "That's mine.""
);
test!(
escape_all_quotes_quotes,
escape_all_quotes("He said, \"That's mine.\"")
== "He said, "That's mine.""
);
test!(
escape_all_quotes_bytes_quotes,
&*escape_all_quotes_bytes(&b"He said, \"That's mine.\""[..])
== b"He said, "That's mine.""
);
test!(
escape_text_bytes_quotes,
&*escape_text_bytes(&b"He said, \"That's mine.\""[..])
== b"He said, \"That's mine.\""
);
test!(
escape_attribute_bytes_quotes,
&*escape_attribute_bytes(&b"He said, \"That's mine.\""[..])
== b"He said, "That's mine.""
);
const HTML_DIRTY: &str = include_str!("../tests/corpus/html-raw.txt");
const HTML_DIRTY_ESCAPED: &str =
include_str!("../tests/corpus/html-escaped.txt");
const HTML_CLEAN: &str = include_str!("../tests/corpus/html-cleaned.txt");
test!(
escape_text_dirty_html,
escape_text(HTML_DIRTY) == HTML_DIRTY_ESCAPED
);
test!(
escape_text_clean_html,
escape_text(HTML_CLEAN) == HTML_CLEAN
);
test!(
escape_text_bytes_dirty_html,
escape_text_bytes(HTML_DIRTY.as_bytes())
== HTML_DIRTY_ESCAPED.as_bytes()
);
test!(
escape_text_bytes_clean_html,
escape_text_bytes(HTML_CLEAN.as_bytes()) == HTML_CLEAN.as_bytes()
);
test!(
escape_text_bytes_invalid_utf8,
escape_text_bytes(&b"\xa1"[..]) == &b"\xa1"[..]
);
test!(
escape_attribute_bytes_invalid_utf8,
escape_attribute_bytes(&b"\xa1"[..]) == &b"\xa1"[..]
);
test!(
escape_all_quotes_bytes_invalid_utf8,
escape_all_quotes_bytes(&b"\xa1"[..]) == &b"\xa1"[..]
);
}