#![no_std]
#![deny(missing_docs)]
#![deny(clippy::undocumented_unsafe_blocks)]
#![cfg_attr(all(feature = "simd", nightly), feature(portable_simd))]
#[cfg(any(test, feature = "std"))]
extern crate std;
#[cfg(feature = "alloc")]
extern crate alloc;
#[cfg(any(test, feature = "alloc"))]
use alloc::{borrow::Cow, string::String, vec::Vec};
use token::EscapeTokens;
use core::{
char,
fmt::{self, Write as _},
iter::FusedIterator,
str,
};
pub mod explicit;
pub mod stream;
pub mod token;
#[inline]
pub fn escape_str(input: &str) -> Escape<'_> {
Escape {
inner: EscapeTokens::new(input),
}
}
#[derive(Clone)]
#[must_use = "iterators are lazy and do nothing unless consumed"]
pub struct Escape<'a> {
inner: EscapeTokens<'a>,
}
impl<'a> Iterator for Escape<'a> {
type Item = &'a str;
#[inline(always)]
fn next(&mut self) -> Option<&'a str> {
self.inner.next().map(|s| s.as_str())
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
}
impl<'a> FusedIterator for Escape<'a> {}
impl fmt::Display for Escape<'_> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.inner, f)
}
}
impl fmt::Debug for Escape<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Escape").finish_non_exhaustive()
}
}
impl<B: AsRef<[u8]> + ?Sized> PartialEq<B> for Escape<'_> {
fn eq(&self, other: &B) -> bool {
let mut other = other.as_ref();
for chunk in self.clone() {
if !other.starts_with(chunk.as_bytes()) {
return false;
}
other = &other[chunk.len()..];
}
other.is_empty()
}
}
impl<'a, 'b> PartialEq<Escape<'a>> for Escape<'b> {
fn eq(&self, other: &Escape<'a>) -> bool {
self.inner.bytes == other.inner.bytes || chunks_eq(self.clone(), other.clone())
}
}
#[cfg(feature = "alloc")]
impl<'a> From<Escape<'a>> for Cow<'a, str> {
fn from(iter: Escape<'a>) -> Self {
iter.inner.into()
}
}
#[inline]
pub fn unescape<I: AsRef<[u8]> + ?Sized>(input: &I) -> Unescape<'_> {
Unescape::new(input.as_ref())
}
#[inline]
pub fn unescape_quoted<I: AsRef<[u8]> + ?Sized>(input: &I) -> Unescape<'_> {
let bytes = input.as_ref();
let input = if bytes.len() >= 2 && bytes[0] == b'\"' && bytes[bytes.len() - 1] == b'\"' {
&bytes[1..bytes.len() - 1]
} else {
bytes
};
unescape(input)
}
#[derive(Clone)]
#[must_use = "iterators are lazy and do nothing unless consumed"]
pub struct Unescape<'a> {
inner: explicit::Unescape<'a>,
unicode: [u8; 4],
unicode_len: u8, unicode_pos: u8, }
impl<'a> Unescape<'a> {
fn new(input: &'a [u8]) -> Self {
Self {
inner: explicit::Unescape { bytes: input },
unicode: [0; 4],
unicode_len: 0,
unicode_pos: 0,
}
}
#[inline]
fn store_unicode(&mut self, ch: char) {
self.unicode_len = ch.encode_utf8(&mut self.unicode).len() as u8;
self.unicode_pos = 0;
}
#[inline]
fn emit_pending_byte(&mut self) -> Option<u8> {
if self.unicode_pos < self.unicode_len {
let b = self.unicode[self.unicode_pos as usize];
self.unicode_pos += 1;
Some(b)
} else {
None
}
}
#[inline]
fn emit_unicode_as_str(&mut self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = unsafe { str::from_utf8_unchecked(&self.unicode[..self.unicode_len as usize]) };
f.write_str(s)?;
self.unicode_pos = self.unicode_len;
Ok(())
}
fn _display_utf8(mut self, f: &mut fmt::Formatter<'_>, lossy: bool) -> fmt::Result {
while let Some(result) = self.next() {
match result {
Ok(chunk) => {
if chunk.is_empty() {
continue;
}
if self.unicode_pos == 1 && self.unicode_len > 1 {
self.emit_unicode_as_str(f)?;
self.unicode_pos = self.unicode_len;
} else {
display_bytes_utf8(chunk, f, lossy)?;
}
}
Err(_) => {
if lossy {
break;
} else {
return Err(fmt::Error);
}
}
}
}
Ok(())
}
#[cfg(feature = "alloc")]
pub fn decode_utf8(self) -> Result<Cow<'a, str>, DecodeUtf8Error> {
match self.try_into().map_err(DecodeUtf8Error::Unescape)? {
Cow::Borrowed(bytes) => str::from_utf8(bytes)
.map(Cow::Borrowed)
.map_err(DecodeUtf8Error::Utf8),
Cow::Owned(bytes) => String::from_utf8(bytes)
.map(Cow::Owned)
.map_err(|e| DecodeUtf8Error::Utf8(e.utf8_error())),
}
}
#[cfg(feature = "alloc")]
pub fn decode_utf8_lossy(self) -> Result<Cow<'a, str>, UnescapeError> {
Ok(decode_utf8_lossy(self.try_into()?))
}
pub fn display_utf8(self) -> DisplayUnescape<'a> {
DisplayUnescape { inner: self }
}
pub fn display_utf8_lossy(self) -> DisplayUnescapeLossy<'a> {
DisplayUnescapeLossy { inner: self }
}
}
impl<'a> Iterator for Unescape<'a> {
type Item = Result<&'a [u8], UnescapeError>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(s) = self.emit_pending_byte() {
return Some(Ok(byte_as_static_slice(s)));
}
match self.inner.next() {
Some(Ok(chunk)) => {
if let Some(ch) = chunk.unescaped {
self.store_unicode(ch);
}
Some(Ok(chunk.literal))
}
Some(Err(err)) => Some(Err(err)),
None => None,
}
}
}
impl<'a> FusedIterator for Unescape<'a> {}
#[cfg(feature = "std")]
impl std::io::Read for Unescape<'_> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let mut total_written = 0;
let mut remaining_buf = buf;
loop {
if self.unicode_pos < self.unicode_len {
let pending_unicode =
&self.unicode[self.unicode_pos as usize..self.unicode_len as usize];
let bytes_to_copy = pending_unicode.len().min(remaining_buf.len());
remaining_buf[..bytes_to_copy].copy_from_slice(&pending_unicode[..bytes_to_copy]);
self.unicode_pos += bytes_to_copy as u8;
total_written += bytes_to_copy;
remaining_buf = &mut remaining_buf[bytes_to_copy..];
if remaining_buf.is_empty() {
break;
}
}
if self.unicode_pos >= self.unicode_len {
self.unicode_pos = 0;
self.unicode_len = 0;
}
match self.inner.next() {
Some(Ok(chunk)) => {
let bytes_to_copy = chunk.literal.len().min(remaining_buf.len());
if bytes_to_copy > 0 {
remaining_buf[..bytes_to_copy]
.copy_from_slice(&chunk.literal[..bytes_to_copy]);
total_written += bytes_to_copy;
remaining_buf = &mut remaining_buf[bytes_to_copy..];
}
if bytes_to_copy < chunk.literal.len() {
unsafe {
let new_start_ptr = chunk.literal.as_ptr().add(bytes_to_copy);
let stream_end_ptr =
self.inner.bytes.as_ptr().add(self.inner.bytes.len());
let new_len = stream_end_ptr as usize - new_start_ptr as usize;
self.inner.bytes = std::slice::from_raw_parts(new_start_ptr, new_len);
}
break;
}
if let Some(ch) = chunk.unescaped {
let encoded = ch.encode_utf8(&mut self.unicode);
self.unicode_len = encoded.len() as u8;
continue;
}
}
Some(Err(e)) => {
return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, e));
}
None => break, }
}
Ok(total_written)
}
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> std::io::Result<usize> {
let start_len = buf.len();
for result in self {
match result {
Ok(chunk) => buf.extend_from_slice(chunk),
Err(err) => return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, err)),
}
}
Ok(buf.len() - start_len)
}
}
impl fmt::Debug for Unescape<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Unescape").finish_non_exhaustive()
}
}
impl<B: AsRef<[u8]> + ?Sized> PartialEq<B> for Unescape<'_> {
fn eq(&self, other: &B) -> bool {
let mut other = other.as_ref();
for result in self.clone() {
match result {
Ok(chunk) => {
if !other.starts_with(chunk) {
return false;
}
other = &other[chunk.len()..];
}
Err(_) => return false, }
}
other.is_empty()
}
}
impl<B: AsRef<[u8]>> PartialEq<Unescape<'_>> for Result<B, UnescapeError> {
fn eq(&self, unescape: &Unescape<'_>) -> bool {
match self {
Ok(expected_bytes) => unescape == expected_bytes,
Err(expected_error) => {
for result in unescape.clone() {
if let Err(actual_error) = result {
return actual_error == *expected_error;
}
}
false
}
}
}
}
impl<'a, 'b> PartialEq<Unescape<'a>> for Unescape<'b> {
fn eq(&self, other: &Unescape<'a>) -> bool {
((self.inner.bytes == other.inner.bytes)
&& (self.unicode == other.unicode)
&& (self.unicode_len == other.unicode_len)
&& (self.unicode_pos == other.unicode_pos))
|| {
let mut a_error = None;
let mut b_error = None;
let mut a = self.clone().map_while(|result| match result {
Ok(ok) => Some(ok),
Err(err) => {
a_error = Some(err);
None
}
});
let mut b = other.clone().map_while(|result| match result {
Ok(ok) => Some(ok),
Err(err) => {
b_error = Some(err);
None
}
});
let streams_match = chunks_eq(&mut a, &mut b);
a.for_each(|_| {});
b.for_each(|_| {});
match (a_error, b_error) {
(Some(a_err), Some(b_err)) => a_err == b_err,
(None, None) => streams_match,
_ => false,
}
}
}
}
#[cfg(feature = "alloc")]
impl<'a> TryFrom<Unescape<'a>> for Cow<'a, [u8]> {
type Error = UnescapeError;
fn try_from(mut value: Unescape<'a>) -> Result<Self, Self::Error> {
match value.next() {
None => Ok(Cow::Borrowed(b"")),
Some(Ok(first)) => match value.next() {
None => Ok(Cow::Borrowed(first)),
Some(Ok(second)) => {
let mut buf =
Vec::with_capacity(first.len() + second.len() + value.inner.bytes.len());
buf.extend_from_slice(first);
buf.extend_from_slice(second);
for item in value {
buf.extend_from_slice(item?);
}
Ok(Cow::Owned(buf))
}
Some(Err(e)) => Err(e),
},
Some(Err(e)) => Err(e),
}
}
}
pub struct DisplayUnescape<'a> {
inner: Unescape<'a>,
}
impl fmt::Display for DisplayUnescape<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.inner.clone()._display_utf8(f, false)
}
}
pub struct DisplayUnescapeLossy<'a> {
inner: Unescape<'a>,
}
impl fmt::Display for DisplayUnescapeLossy<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.inner.clone()._display_utf8(f, true)
}
}
#[derive(Copy, Eq, PartialEq, Clone, Debug)]
pub enum DecodeUtf8Error {
Utf8(str::Utf8Error),
Unescape(UnescapeError),
}
impl fmt::Display for DecodeUtf8Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
DecodeUtf8Error::Utf8(e) => fmt::Display::fmt(e, f),
DecodeUtf8Error::Unescape(e) => fmt::Display::fmt(e, f),
}
}
}
#[derive(Copy, Eq, PartialEq, Clone, Debug)]
#[non_exhaustive]
pub struct InvalidEscapeError {
pub found: u8,
}
#[derive(Copy, Eq, PartialEq, Clone, Debug)]
#[non_exhaustive]
pub struct LoneSurrogateError {
pub surrogate: u16,
}
#[derive(Copy, Eq, PartialEq, Clone, Debug)]
#[non_exhaustive]
pub struct InvalidHexError {
pub found: u8,
}
impl fmt::Display for InvalidHexError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "found invalid hex digit '0x{:02X}'", self.found)
}
}
#[derive(Copy, Eq, PartialEq, Clone, Debug)]
pub struct UnescapeError {
pub(crate) kind: UnescapeErrorKind,
pub(crate) offset: u8,
}
impl UnescapeError {
pub fn kind(&self) -> UnescapeErrorKind {
self.kind
}
pub fn offset(&self) -> u8 {
self.offset
}
}
#[derive(Copy, Eq, PartialEq, Clone, Debug)]
#[non_exhaustive]
pub enum UnescapeErrorKind {
InvalidEscape(InvalidEscapeError),
InvalidHex(InvalidHexError),
UnexpectedEof,
LoneSurrogate(LoneSurrogateError),
}
impl fmt::Display for UnescapeError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.kind {
UnescapeErrorKind::InvalidEscape(e) => {
write!(
f,
"invalid escape: '\\0x{:02X}' at offset {}",
e.found, self.offset
)
}
UnescapeErrorKind::InvalidHex(ref s) => {
write!(f, "{} at offset {}", s, self.offset)
}
UnescapeErrorKind::UnexpectedEof => {
write!(
f,
"unexpected end of input while parsing escape sequence, expected character at offset {}",
self.offset
)
}
UnescapeErrorKind::LoneSurrogate(e) => write!(
f,
"invalid unicode sequence: lone surrogate found: 0x{:04X} at offset {}",
e.surrogate, self.offset
),
}
}
}
impl core::error::Error for UnescapeError {}
impl core::error::Error for DecodeUtf8Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
DecodeUtf8Error::Utf8(e) => Some(e),
DecodeUtf8Error::Unescape(e) => Some(e),
}
}
}
const U8_TABLE: [[u8; 1]; 256] = {
let mut arr = [[0u8; 1]; 256];
let mut i = 0usize;
while i < 256 {
arr[i] = [i as u8];
i += 1;
}
arr
};
#[inline(always)]
fn byte_as_static_slice(b: u8) -> &'static [u8] {
&U8_TABLE[b as usize]
}
#[cfg(feature = "alloc")]
#[allow(ambiguous_wide_pointer_comparisons)]
fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> {
match input {
Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
Cow::Owned(bytes) => {
match String::from_utf8_lossy(&bytes) {
Cow::Borrowed(utf8) => {
let raw_utf8: *const [u8] = utf8.as_bytes();
debug_assert!(core::ptr::eq(raw_utf8, &*bytes));
Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
}
Cow::Owned(s) => Cow::Owned(s),
}
}
}
}
fn chunks_eq<'a, I1, A, I2, B>(mut a: I1, mut b: I2) -> bool
where
A: 'a + AsRef<[u8]> + ?Sized,
B: 'a + AsRef<[u8]> + ?Sized,
I1: Iterator<Item = &'a A>,
I2: Iterator<Item = &'a B>,
{
let mut a_rem: &[u8] = &[];
let mut b_rem: &[u8] = &[];
loop {
if a_rem.is_empty() {
match a.next() {
Some(chunk) => a_rem = chunk.as_ref(),
None => return b_rem.is_empty() && b.next().is_none(),
}
}
if b_rem.is_empty() {
match b.next() {
Some(chunk) => b_rem = chunk.as_ref(),
None => return false,
}
}
let n = a_rem.len().min(b_rem.len());
if a_rem[..n] != b_rem[..n] {
return false;
}
a_rem = &a_rem[n..];
b_rem = &b_rem[n..];
}
}
#[inline]
fn display_bytes_utf8(bytes: &[u8], f: &mut fmt::Formatter<'_>, lossy: bool) -> fmt::Result {
for chunk in bytes.utf8_chunks() {
f.write_str(chunk.valid())?;
if !chunk.invalid().is_empty() {
if lossy {
f.write_char(char::REPLACEMENT_CHARACTER)?
} else {
return Err(fmt::Error);
}
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use core::fmt::Display;
use std::{io::Read as _, string::ToString as _, vec};
use super::*;
fn test_escape_typical(input: &str, want: &str) {
let got = escape_str(input).collect::<String>();
assert_eq!(got, want);
assert_eq!(escape_str(input), want);
let got = explicit::escape_str(input).collect::<String>();
assert_eq!(got, want);
assert_eq!(explicit::escape_str(input), want)
}
#[test]
fn test_empty_string() {
test_escape_typical("", "");
}
#[test]
fn test_quotes() {
test_escape_typical("\"hello\"", "\\\"hello\\\"")
}
#[test]
fn test_backslash() {
test_escape_typical("\\hello\\", "\\\\hello\\\\");
}
#[test]
fn test_slash() {
test_escape_typical("/hello/", "/hello/");
}
#[test]
fn test_control_chars() {
test_escape_typical("\n\r\t\x08\x0C", "\\n\\r\\t\\b\\f");
}
#[test]
fn test_escape_fully() {
let input = "Hello, \"world\"!\nThis contains a \\ backslash and a \t tab.";
let expected = r#"Hello, \"world\"!\nThis contains a \\ backslash and a \t tab."#;
test_escape_typical(input, expected);
}
#[test]
fn test_other_control_chars() {
let input = "Null:\0, Bell:\x07";
let expected = r#"Null:\u0000, Bell:\u0007"#;
test_escape_typical(input, expected);
test_escape_typical("\x00\x1F", "\\u0000\\u001f");
test_escape_typical("\x19", "\\u0019");
}
#[test]
fn test_iterator_chunks() {
let input = "prefix\npostfix";
let mut iter = escape_str(input);
assert_eq!(iter.next(), Some("prefix"));
assert_eq!(iter.next(), Some(r#"\n"#));
assert_eq!(iter.next(), Some("postfix"));
assert_eq!(iter.next(), None);
}
#[test]
fn test_no_escape_needed() {
let input = "A simple string with no escapes.";
let mut iter = escape_str(input);
assert_eq!(iter.next(), Some("A simple string with no escapes."));
assert_eq!(iter.next(), None);
let input = "café";
let mut iter = escape_str(input);
assert_eq!(iter.next(), Some("café"));
assert_eq!(iter.next(), None);
let input = "❤️";
let mut iter = escape_str(input);
assert_eq!(iter.next(), Some("❤️"));
assert_eq!(iter.next(), None);
}
#[test]
fn test_byte_table() {
assert_eq!(byte_as_static_slice(0), &[0]);
assert_eq!(byte_as_static_slice(5), &[5]);
assert_eq!(byte_as_static_slice(255), &[255]);
}
fn test_unescape_typical<I: AsRef<[u8]> + ?Sized>(input: &I, want: &str) {
let got = unescape(input).decode_utf8().unwrap();
assert_eq!(got, want);
assert_eq!(unescape(input), want);
assert_display(unescape(input).display_utf8(), Ok(want));
let got = explicit::unescape(input).decode_utf8().unwrap();
assert_eq!(got, want);
assert_eq!(explicit::unescape(input), want);
assert_display(explicit::unescape(input).display_utf8(), Ok(want));
}
#[test]
fn test_unicode_escape_basic_unescape() {
let s = "X\\u4E16Y";
test_unescape_typical(s, "X世Y");
let s = "Snow: \\u2603"; test_unescape_typical(s, "Snow: ☃");
let s = "A \\u03A9 B"; test_unescape_typical(s, "A Ω B");
}
#[test]
fn test_surrogate_pair_unescape() {
let s = "A\\uD83D\\uDE00B";
test_unescape_typical(s, "A😀B")
}
#[test]
fn test_invalid_escape_unescape() {
let s = b"\\x";
let mut u = unescape(s);
match u.next() {
Some(Err(UnescapeError {
kind: UnescapeErrorKind::InvalidEscape(InvalidEscapeError { found: b'x' }),
offset: 1,
})) => {}
_ => panic!("expected invalid escape"),
}
let mut u = explicit::unescape(s);
match u.next() {
Some(Err(UnescapeError {
kind: UnescapeErrorKind::InvalidEscape(InvalidEscapeError { found: b'x' }),
offset: 1,
})) => {}
_ => panic!("expected invalid escape"),
}
}
#[test]
fn test_simple_unescape() {
let input = "Hello\\nWorld\\\"!"; test_unescape_typical(input, "Hello\nWorld\"!")
}
#[test]
fn test_truncated_unicode() {
let input = "Trunc: \\u12"; let it = unescape(input);
let mut found = false;
for r in it {
match r {
Ok(_) => continue,
Err(UnescapeError {
kind: UnescapeErrorKind::UnexpectedEof,
offset: 4,
}) => {
found = true;
break;
}
Err(_) => break,
}
}
assert!(found);
assert_eq!(
explicit::unescape(input).next(),
Some(Err(UnescapeError {
kind: UnescapeErrorKind::UnexpectedEof,
offset: 4,
}))
);
}
#[test]
fn test_empty_iterators_are_equal() {
let a: Vec<&[u8]> = vec![];
let b: Vec<&[u8]> = vec![];
assert!(chunks_eq(a.into_iter(), b.into_iter()));
}
#[test]
fn test_empty_vs_non_empty() {
let a: Vec<&[u8]> = vec![];
let b = vec![&[1, 2, 3]];
assert!(!chunks_eq(a.into_iter(), b.into_iter()));
let a = vec![&[1, 2, 3]];
let b: Vec<&[u8]> = vec![];
assert!(!chunks_eq(a.into_iter(), b.into_iter()));
}
#[test]
fn test_single_identical_chunks() {
let a = vec!["hello world"];
let b = vec!["hello world"];
assert!(chunks_eq(a.into_iter(), b.into_iter()));
}
#[test]
fn test_different_chunk_boundaries_str() {
let a = vec!["he", "llo", " ", "world"];
let b = vec!["hello ", "wo", "rld"];
assert!(chunks_eq(a.into_iter(), b.into_iter()));
}
#[test]
fn test_different_chunk_boundaries_bytes() {
let a = vec![&[1, 2], &[3, 4, 5][..]];
let b = vec![&[1, 2, 3], &[4, 5][..]];
assert!(chunks_eq(a.into_iter(), b.into_iter()));
}
#[test]
fn test_one_long_vs_many_short() {
let a = vec!["a-long-single-chunk"];
let b = vec!["a", "-", "long", "-", "single", "-", "chunk"];
assert!(chunks_eq(a.into_iter(), b.into_iter()));
}
#[test]
fn test_unequal_content_same_length() {
let a = vec!["hello"];
let b = vec!["hallo"];
assert!(!chunks_eq(a.into_iter(), b.into_iter()));
}
#[test]
fn test_unequal_at_chunk_boundary() {
let a = vec!["ab", "c"]; let b = vec!["ab", "d"]; assert!(!chunks_eq(a.into_iter(), b.into_iter()));
}
#[test]
fn test_one_is_prefix_of_other() {
let a = vec!["user", "name"]; let b = vec!["user", "name", "123"]; assert!(!chunks_eq(a.into_iter(), b.into_iter()));
let a = vec!["user", "name", "123"];
let b = vec!["user", "name"];
assert!(!chunks_eq(a.into_iter(), b.into_iter()));
}
#[test]
fn test_complex_remainer_logic() {
let a = vec![&[1, 2, 3], &[4, 5][..], &[6, 7, 8, 9], &[10]];
let b = vec![&[1, 2], &[3, 4, 5, 6][..], &[7, 8], &[9, 10]];
assert!(chunks_eq(a.into_iter(), b.into_iter()));
}
#[test]
fn test_with_vec_references() {
let v_a1 = vec![1, 2];
let v_a2 = vec![3, 4, 5];
let a_data = vec![&v_a1, &v_a2];
let v_b1 = vec![1, 2, 3];
let v_b2 = vec![4, 5];
let b_data = vec![&v_b1, &v_b2];
assert!(chunks_eq(a_data.into_iter(), b_data.into_iter()));
}
#[test]
fn bytes_provenance() {
let input = b"hello";
let mut iter = explicit::unescape(input);
let chunk = iter.next().unwrap().unwrap();
assert_eq!(chunk.literal, b"hello");
assert!(core::ptr::eq(iter.bytes, &input[input.len()..]));
let input = "hello";
let mut iter = explicit::escape_str(input);
let chunk = iter.next().unwrap();
assert_eq!(chunk.literal(), "hello");
assert!(core::ptr::eq(
unsafe { str::from_utf8_unchecked(iter.bytes) },
&input[input.len()..]
));
let mut iter = escape_str(input);
let chunk = iter.next().unwrap();
assert_eq!(chunk, "hello");
assert!(core::ptr::eq(
unsafe { str::from_utf8_unchecked(iter.inner.bytes) },
&input[input.len()..]
))
}
#[test]
fn test_read_simple() {
let input = br#"hello world"#;
let mut reader = unescape(input);
let mut buf = [0u8; 20];
let bytes_read = reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 11);
assert_eq!(&buf[..bytes_read], b"hello world");
let bytes_read_eof = reader.read(&mut buf).unwrap();
assert_eq!(bytes_read_eof, 0);
}
#[test]
fn test_read_with_simple_escapes() {
let input = br#"hello\tworld\nline2"#;
let mut reader = unescape(input);
let mut buf = Vec::new();
reader.read_to_end(&mut buf).unwrap();
assert_eq!(buf, b"hello\tworld\nline2");
}
#[test]
fn test_read_into_small_buffer_multiple_calls() {
let input = br#"this is a long string with no escapes"#;
let mut reader = unescape(input);
let mut buf = [0u8; 10];
let mut result = Vec::new();
loop {
match reader.read(&mut buf) {
Ok(0) => break, Ok(n) => {
result.extend_from_slice(&buf[..n]);
}
Err(e) => panic!("Read error: {}", e),
}
}
assert_eq!(result, input);
}
#[test]
fn test_read_multibyte_char_across_buffer_boundary() {
let input = br#"emoji: \uD83D\uDE00 is here"#;
let mut reader = unescape(input);
let mut buf = [0u8; 8];
let mut result = Vec::new();
let n1 = reader.read(&mut buf).unwrap();
assert_eq!(n1, 8);
assert_eq!(&buf[..n1], b"emoji: \xF0");
result.extend_from_slice(&buf[..n1]);
let n2 = reader.read(&mut buf).unwrap();
assert_eq!(n2, 8);
assert_eq!(&buf[..n2], b"\x9F\x98\x80 is h");
result.extend_from_slice(&buf[..n2]);
let n3 = reader.read(&mut buf).unwrap();
assert_eq!(n3, 3);
assert_eq!(&buf[..n3], b"ere");
result.extend_from_slice(&buf[..n3]);
let n4 = reader.read(&mut buf).unwrap();
assert_eq!(n4, 0);
assert_eq!(result, b"emoji: \xF0\x9F\x98\x80 is here");
assert_eq!(result, "emoji: 😀 is here".as_bytes());
}
#[test]
fn test_read_error_invalid_escape() {
let input = br#"hello \q world"#;
let mut reader = unescape(input);
let mut buf = [0u8; 20];
let result = reader.read(&mut buf);
assert!(result.is_err());
let err = result.unwrap_err();
assert_eq!(err.kind(), std::io::ErrorKind::InvalidData);
assert!(err.to_string().contains("invalid escape"));
}
#[test]
fn test_read_error_lone_surrogate() {
let input = br#"\uD83D rest of data seen"#; let mut reader = unescape(input);
let mut buf = [0u8; 10];
let err = reader.read(&mut buf).unwrap_err();
assert_eq!(err.kind(), std::io::ErrorKind::InvalidData);
assert!(err.to_string().contains("lone surrogate"));
}
#[test]
fn test_read_empty_input() {
let input = b"";
let mut reader = unescape(input);
let mut buf = [0u8; 10];
let bytes_read = reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 0);
}
#[test]
fn test_read_into_empty_buffer() {
let input = b"hello";
let mut reader = unescape(input);
let mut buf = [0u8; 0];
let bytes_read = reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 0);
}
#[test]
fn test_read_to_end_optimized() {
let input = br#"first\nsecond\tthird \uD83D\uDE00 last"#;
let mut reader = unescape(input);
let mut buf = Vec::new();
let bytes_read = reader.read_to_end(&mut buf).unwrap();
let expected = b"first\nsecond\tthird \xF0\x9F\x98\x80 last";
assert_eq!(bytes_read, expected.len());
assert_eq!(buf, expected);
}
fn assert_display(display: impl Display, want: Result<&str, ()>) {
let mut w = String::new();
let res = fmt::write(&mut w, format_args!("{display}"));
match want {
Ok(want) => {
assert!(res.is_ok());
assert_eq!(w, want)
}
Err(_) => assert!(
res.is_err(),
"strict mode should return Err on invalid bytes"
),
}
}
#[test]
fn test_display_simple_string() {
let display = unescape("hello world").display_utf8();
assert_display(display, Ok("hello world"));
}
#[test]
fn test_display_empty_string() {
assert_display(unescape("").display_utf8(), Ok(""));
}
#[test]
fn test_display_standard_escapes() {
let input = br#"\" \\ \/ \b \f \n \r \t"#;
let expected = "\" \\ / \x08 \x0C \n \r \t";
assert_display(unescape(input).display_utf8(), Ok(expected));
}
#[test]
fn test_display_non_escaped_utf8() {
let input = "你好, world".as_bytes();
let expected = "你好, world";
assert_display(unescape(input).display_utf8(), Ok(expected));
}
#[test]
fn test_display_unicode_escape_bmp() {
let input = br"a\u00A2b";
let expected = "a¢b";
assert_display(unescape(input).display_utf8(), Ok(expected));
}
#[test]
fn test_display_mixed_content() {
let input = br#"Text with \n, \u00A2, and \uD83D\uDE0E emojis."#;
let expected = "Text with \n, ¢, and 😎 emojis.";
assert_display(unescape(input).display_utf8(), Ok(expected));
}
#[test]
fn test_display_starts_and_ends_with_escape() {
let input = br#"\u00A2hello\t"#;
let expected = "¢hello\t";
assert_display(unescape(input).display_utf8(), Ok(expected));
}
#[test]
fn test_display_err_invalid_escape() {
assert_display(unescape(br"hello \z world").display_utf8(), Err(()));
}
#[test]
fn test_display_err_incomplete_unicode() {
assert_display(unescape(br"\u123").display_utf8(), Err(()));
}
#[test]
fn test_display_err_invalid_hex_in_unicode() {
assert_display(unescape(br"\u123g").display_utf8(), Err(()));
}
#[test]
fn test_display_err_lone_high_surrogate() {
assert_display(unescape(br"\uD800").display_utf8(), Err(()));
}
#[test]
fn test_display_err_high_surrogate_not_followed_by_low() {
assert_display(unescape(br"\uD800\uABCD").display_utf8(), Err(()));
}
#[test]
fn test_display_err_invalid_source_utf8() {
assert_display(unescape(b"h\x80ello").display_utf8(), Err(()));
}
#[test]
fn strict_valid_multi_byte_split() {
let input = &[0xE2, 0x82, 0xAC];
let display = unescape(input).display_utf8();
assert_display(display, Ok("€"));
}
#[test]
fn strict_errors_on_invalid_start_byte() {
let input = &[0xFF, b'a'];
let display = unescape(input).display_utf8();
assert_display(display, Err(()));
}
#[test]
fn lossy_replaces_invalid_start_byte() {
let input = &[0xFF, b'a']; let display = unescape(input).display_utf8_lossy();
assert_display(display, Ok("\u{FFFD}a"));
}
#[test]
fn lossy_handles_trailing_incomplete_bytes() {
let input: &[u8] = &[0xE2, 0x82];
let display = unescape(input).display_utf8_lossy();
assert_display(display, Ok("\u{FFFD}"));
}
#[test]
fn test_display_lossy_invalid_source_utf8() {
let input = b"valid\xF0\x90\x80invalid";
let expected = "valid\u{FFFD}invalid";
assert_display(unescape(input).display_utf8_lossy(), Ok(expected));
}
#[test]
fn test_display_lossy_invalid_escape_truncates() {
let input = br"this is ok \n but this is not \z";
let expected = "this is ok \n";
assert_display(unescape(input).display_utf8_lossy(), Ok(expected));
}
#[test]
fn test_display_lossy_incomplete_unicode_truncates() {
let input = br"truncate after \n \uD83D";
let expected = "truncate after \n";
assert_display(unescape(input).display_utf8_lossy(), Ok(expected));
}
#[test]
fn sync_regression() {
use core::panic::{RefUnwindSafe, UnwindSafe};
fn assert_send_sync<T: Send + Sync + UnwindSafe + RefUnwindSafe>() {}
assert_send_sync::<Unescape<'_>>();
assert_send_sync::<Escape<'_>>();
}
}