#[cfg(feature = "alloc")]
use crate::DecodeUtf8Error;
use crate::token::{EscapeTokens, UnescapeTokens};
use crate::{UnescapeError, display_bytes_utf8};
use core::fmt;
use core::iter::FusedIterator;
use core::str;
#[cfg(feature = "alloc")]
use alloc::{borrow::Cow, string::String, vec::Vec};
#[inline]
pub fn escape_str(s: &str) -> Escape<'_> {
Escape {
bytes: s.as_bytes(),
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct EscapedChunk<'a> {
literal: &'a str,
escaped: Option<&'static str>,
}
impl<'a> EscapedChunk<'a> {
#[inline]
pub const fn literal(&self) -> &'a str {
self.literal
}
#[inline]
pub const fn escaped(&self) -> Option<&'static str> {
self.escaped
}
}
impl<'a> fmt::Display for EscapedChunk<'a> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.literal)?;
if let Some(s) = self.escaped {
f.write_str(s)?;
}
Ok(())
}
}
#[derive(Clone)]
#[must_use = "iterators are lazy and do nothing unless consumed"]
pub struct Escape<'a> {
pub(crate) bytes: &'a [u8],
}
impl<'a> Iterator for Escape<'a> {
type Item = EscapedChunk<'a>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.bytes.is_empty() {
return None;
}
let (literal, rest) = unsafe { EscapeTokens::split_at_escape(self.bytes) };
Some(EscapedChunk {
literal,
escaped: {
if rest.is_empty() {
self.bytes = rest;
None
} else {
self.bytes = &rest[1..];
Some(
EscapeTokens::escape(rest[0])
.expect("find_escape_char found a byte not in ESCAPE_TABLE"),
)
}
},
})
}
}
impl<'a> FusedIterator for Escape<'a> {}
impl<'a> fmt::Display for Escape<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for chunk in self.clone() {
write!(f, "{chunk}")?;
}
Ok(())
}
}
impl fmt::Debug for Escape<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Escape").finish_non_exhaustive()
}
}
impl<B: AsRef<[u8]> + ?Sized> PartialEq<B> for Escape<'_> {
fn eq(&self, other: &B) -> bool {
let mut other = other.as_ref();
for chunk in self.clone() {
if !other.starts_with(chunk.literal.as_bytes()) {
return false;
}
other = &other[chunk.literal.len()..];
if let Some(escaped_str) = chunk.escaped {
if !other.starts_with(escaped_str.as_bytes()) {
return false;
}
other = &other[escaped_str.len()..];
}
}
other.is_empty()
}
}
impl<'a, 'b> PartialEq<Escape<'a>> for Escape<'b> {
fn eq(&self, other: &Escape<'a>) -> bool {
crate::Escape {
inner: EscapeTokens { bytes: self.bytes },
} == crate::Escape {
inner: EscapeTokens { bytes: other.bytes },
}
}
}
#[cfg(feature = "alloc")]
impl<'a> From<Escape<'a>> for Cow<'a, str> {
fn from(mut iter: Escape<'a>) -> Self {
match iter.next() {
None => Cow::Borrowed(""),
Some(first) => {
if first.escaped.is_none() {
Cow::Borrowed(first.literal)
} else {
let mut s = String::with_capacity(
first.literal.len() + first.escaped.unwrap().len() + iter.bytes.len(),
);
s.push_str(first.literal);
s.push_str(first.escaped.unwrap());
s.extend(iter);
Cow::Owned(s)
}
}
}
}
}
#[inline]
pub fn unescape<I: AsRef<[u8]> + ?Sized>(input: &I) -> Unescape<'_> {
Unescape {
bytes: input.as_ref(),
}
}
#[inline]
pub fn unescape_quoted(bytes: &[u8]) -> Unescape<'_> {
let inner = if bytes.len() >= 2 && bytes.first() == Some(&b'"') && bytes.last() == Some(&b'"') {
&bytes[1..bytes.len() - 1]
} else {
bytes
};
unescape(inner)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct UnescapedChunk<'a> {
pub(crate) literal: &'a [u8],
pub(crate) unescaped: Option<char>,
}
impl<'a> UnescapedChunk<'a> {
#[inline]
pub const fn literal(&self) -> &'a [u8] {
self.literal
}
#[inline]
pub const fn unescaped(&self) -> Option<char> {
self.unescaped
}
pub fn display_utf8(&self) -> DisplayUnescapedChunk<'_> {
DisplayUnescapedChunk {
chunk: self,
lossy: false,
}
}
pub fn display_utf8_lossy(&self) -> DisplayUnescapedChunk<'_> {
DisplayUnescapedChunk {
chunk: self,
lossy: true,
}
}
}
pub struct DisplayUnescapedChunk<'a> {
chunk: &'a UnescapedChunk<'a>,
lossy: bool,
}
impl<'a> fmt::Display for DisplayUnescapedChunk<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
display_bytes_utf8(self.chunk.literal, f, self.lossy)?;
if let Some(c) = self.chunk.unescaped {
use fmt::Write as _;
f.write_char(c)?;
}
Ok(())
}
}
#[derive(Clone)]
#[must_use = "iterators are lazy and do nothing unless consumed"]
pub struct Unescape<'a> {
pub(crate) bytes: &'a [u8],
}
impl<'a> Unescape<'a> {
#[cfg(feature = "alloc")]
pub fn decode_utf8(self) -> Result<Cow<'a, str>, DecodeUtf8Error> {
match self.try_into().map_err(DecodeUtf8Error::Unescape)? {
Cow::Borrowed(bytes) => str::from_utf8(bytes)
.map(Cow::Borrowed)
.map_err(DecodeUtf8Error::Utf8),
Cow::Owned(bytes) => String::from_utf8(bytes)
.map(Cow::Owned)
.map_err(|e| DecodeUtf8Error::Utf8(e.utf8_error())),
}
}
#[cfg(feature = "alloc")]
pub fn decode_utf8_lossy(self) -> Result<Cow<'a, str>, UnescapeError> {
use crate::decode_utf8_lossy;
Ok(decode_utf8_lossy(self.try_into()?))
}
pub fn display_utf8(self) -> DisplayUnescape<'a> {
DisplayUnescape {
inner: self,
lossy: false,
}
}
pub fn display_utf8_lossy(self) -> DisplayUnescape<'a> {
DisplayUnescape {
inner: self,
lossy: true,
}
}
}
impl<'a> Iterator for Unescape<'a> {
type Item = Result<UnescapedChunk<'a>, UnescapeError>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.bytes.is_empty() {
return None;
}
let (literal, rest) = UnescapeTokens::split_at_escape(self.bytes);
Some(Ok(UnescapedChunk {
literal,
unescaped: {
if rest.is_empty() {
self.bytes = rest;
None
} else {
let mut remainder = &rest[1..];
match UnescapeTokens::handle_escape(&mut remainder) {
Ok(unescaped_char) => {
self.bytes = remainder;
Some(unescaped_char)
}
Err(err) => return Some(Err(err)),
}
}
},
}))
}
}
impl<'a> FusedIterator for Unescape<'a> {}
impl fmt::Debug for Unescape<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Unescape").finish_non_exhaustive()
}
}
impl<B: AsRef<[u8]> + ?Sized> PartialEq<B> for Unescape<'_> {
fn eq(&self, other: &B) -> bool {
let mut other = other.as_ref();
let mut char_buf = [0u8; 4];
for result in self.clone() {
match result {
Ok(chunk) => {
if !other.starts_with(chunk.literal) {
return false;
}
other = &other[chunk.literal.len()..];
if let Some(c) = chunk.unescaped {
let char_bytes = c.encode_utf8(&mut char_buf);
if !other.starts_with(char_bytes.as_bytes()) {
return false;
}
other = &other[char_bytes.len()..];
}
}
Err(_) => return false, }
}
other.is_empty()
}
}
impl<B: AsRef<[u8]>> PartialEq<Unescape<'_>> for Result<B, UnescapeError> {
fn eq(&self, unescape: &Unescape<'_>) -> bool {
match self {
Ok(expected_bytes) => unescape == expected_bytes,
Err(expected_error) => {
for result in unescape.clone() {
if let Err(actual_error) = result {
return actual_error == *expected_error;
}
}
false
}
}
}
}
impl<'a, 'b> PartialEq<Unescape<'a>> for Unescape<'b> {
fn eq(&self, other: &Unescape<'a>) -> bool {
crate::unescape(self.bytes) == crate::unescape(other.bytes)
}
}
#[cfg(feature = "alloc")]
impl<'a> TryFrom<Unescape<'a>> for Cow<'a, [u8]> {
type Error = UnescapeError;
fn try_from(mut value: Unescape<'a>) -> Result<Self, Self::Error> {
use crate::token::append_char;
match value.next() {
None => Ok(Cow::Borrowed(b"")),
Some(Ok(first)) => {
if first.unescaped.is_none() {
Ok(Cow::Borrowed(first.literal))
} else {
let mut buf = Vec::with_capacity(value.bytes.len() + 16);
buf.extend_from_slice(first.literal);
if let Some(c) = first.unescaped {
append_char(&mut buf, c);
}
for item in value {
let chunk = item?;
buf.extend_from_slice(chunk.literal);
if let Some(c) = chunk.unescaped {
append_char(&mut buf, c);
}
}
Ok(Cow::Owned(buf))
}
}
Some(Err(e)) => Err(e),
}
}
}
pub struct DisplayUnescape<'a> {
inner: Unescape<'a>,
lossy: bool,
}
impl<'a> fmt::Display for DisplayUnescape<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for chunk_result in self.inner.clone() {
match chunk_result {
Ok(chunk) => {
let display_chunk = DisplayUnescapedChunk {
chunk: &chunk,
lossy: self.lossy,
};
write!(f, "{}", display_chunk)?;
}
Err(_) => return Err(fmt::Error), }
}
Ok(())
}
}
#[cfg(feature = "alloc")]
mod iter_traits {
use crate::token::append_char;
use super::{EscapedChunk, UnescapedChunk};
use alloc::string::String;
use alloc::vec::Vec;
impl<'a> FromIterator<EscapedChunk<'a>> for String {
#[inline]
fn from_iter<I: IntoIterator<Item = EscapedChunk<'a>>>(iter: I) -> String {
let mut s = String::new();
s.extend(iter);
s
}
}
impl<'a> Extend<EscapedChunk<'a>> for String {
#[inline]
fn extend<I: IntoIterator<Item = EscapedChunk<'a>>>(&mut self, iter: I) {
iter.into_iter().for_each(move |chunk| {
self.push_str(chunk.literal);
if let Some(escaped_str) = chunk.escaped {
self.push_str(escaped_str);
}
});
}
}
impl<'a> FromIterator<UnescapedChunk<'a>> for Vec<u8> {
#[inline]
fn from_iter<I: IntoIterator<Item = UnescapedChunk<'a>>>(iter: I) -> Vec<u8> {
let mut buf = Vec::new();
buf.extend(iter);
buf
}
}
impl<'a> Extend<UnescapedChunk<'a>> for Vec<u8> {
#[inline]
fn extend<I: IntoIterator<Item = UnescapedChunk<'a>>>(&mut self, iter: I) {
iter.into_iter().for_each(move |chunk| {
self.extend_from_slice(chunk.literal);
if let Some(c) = chunk.unescaped {
append_char(self, c)
}
})
}
}
}
#[cfg(test)]
mod tests {
use super::*;
impl<'a> EscapedChunk<'a> {
const fn new(literal: &'a str, escaped: Option<&'static str>) -> Self {
Self { literal, escaped }
}
}
impl<'a> UnescapedChunk<'a> {
const fn new(literal: &'a [u8], unescaped: Option<char>) -> Self {
Self { literal, unescaped }
}
}
#[test]
fn escape_chunks() {
let mut it = escape_str("a\nb\"c");
assert_eq!(
it.next(),
Some(EscapedChunk::new("a", Some(r#"\n"#))),
"Chunk 1"
);
assert_eq!(
it.next(),
Some(EscapedChunk::new("b", Some(r#"\""#))),
"Chunk 2"
);
assert_eq!(it.next(), Some(EscapedChunk::new("c", None)), "Chunk 3");
assert_eq!(it.next(), None, "End of iterator");
}
#[test]
fn unescape_chunks() {
let mut it = unescape(br"xy\t\u0020z");
assert_eq!(
it.next().unwrap().unwrap(),
UnescapedChunk::new(b"xy", Some('\t')),
"Chunk 1"
);
assert_eq!(
it.next().unwrap().unwrap(),
UnescapedChunk::new(b"", Some(' ')),
"Chunk 2"
);
assert_eq!(
it.next().unwrap().unwrap(),
UnescapedChunk::new(b"z", None),
"Chunk 3"
);
assert_eq!(it.next(), None, "End of iterator");
}
#[test]
fn test_escape_against_collected_string() {
assert_eq!(
escape_str("Hello, world!").collect::<String>(),
"Hello, world!"
);
assert_eq!(escape_str("a\"b").collect::<String>(), r#"a\"b"#);
assert_eq!(escape_str("\0").collect::<String>(), r#"\u0000"#);
assert_eq!(
escape_str("path/to/file").collect::<String>(),
r#"path/to/file"#
);
escape_str(r#"Unicode test: éàçüö. Emoji: 😀. More symbols: ❤️✅."#).for_each(|_| {});
}
#[test]
fn test_unescape_against_collected_string() {
assert_eq!(
unescape(br"Hello, world!").decode_utf8().unwrap(),
"Hello, world!"
);
assert_eq!(unescape(br"a\nb").decode_utf8().unwrap(), "a\nb");
assert_eq!(unescape(br"\uD83D\uDE00").decode_utf8().unwrap(), "😀");
}
#[test]
fn unescape_error_propagation() {
let mut it = unescape(br"valid\k");
let first_chunk = it.next().unwrap();
assert!(matches!(first_chunk, Err(UnescapeError { .. })));
}
#[test]
fn sync_regression() {
use core::panic::{RefUnwindSafe, UnwindSafe};
fn assert_send_sync<T: Send + Sync + UnwindSafe + RefUnwindSafe>() {}
assert_send_sync::<Unescape<'_>>();
assert_send_sync::<Escape<'_>>();
assert_send_sync::<UnescapedChunk<'_>>();
assert_send_sync::<EscapedChunk<'_>>();
}
}