use core::convert::Infallible;
#[cfg(feature = "std")]
use std::vec::Vec;
use crate::{
UnescapeError, UnescapeErrorKind,
token::{UnescapeTokens, UnescapedToken, unescape},
};
#[macro_export]
macro_rules! unescape_stream_into {
(
$Read:ident: $read:expr,
$Write:ident: |$token:ident| $write:expr,
) => {
#[allow(dead_code)]
#[allow(non_snake_case)]
fn $Read() -> Option<impl AsRef<[u8]>> {
unimplemented!();
#[allow(unreachable_code)]
Some(b"")
}
#[allow(dead_code)]
#[allow(non_snake_case)]
#[allow(unused_variables)]
fn $Write($token: $crate::token::UnescapedToken<'_>) {
unimplemented!();
}
let mut unescaper = $crate::stream::UnescapeStream::new();
loop {
if let Some(part) = $read {
let (boundary_char, rest_of_part) = unescaper.try_unescape_next(part)?;
if let Some(boundary_char) = boundary_char {
let $token = $crate::token::UnescapedToken::Unescaped(boundary_char);
$write
}
for result in rest_of_part {
let unescaped_part = result?;
let $token = unescaped_part;
$write
}
} else {
break;
}
}
unescaper.finish()?;
};
}
#[derive(Debug, Clone)]
#[must_use = "UnescapeStream does nothing unless consumed"]
pub struct UnescapeStream {
stitch_buf: [u8; 12],
stitch_len: u8,
}
impl Default for UnescapeStream {
#[inline]
fn default() -> Self {
Self::new()
}
}
impl UnescapeStream {
#[inline]
pub fn new() -> Self {
Self {
stitch_buf: [0; 12],
stitch_len: 0,
}
}
#[inline]
pub fn try_unescape_next<'a, 'b, I: AsRef<[u8]> + ?Sized>(
&'a mut self,
next_part: &'b I,
) -> Result<(Option<char>, UnescapeNext<'a, 'b>), UnescapeError> {
let (boundary_result, new) = self.unescape_next(next_part);
let boundary_char = boundary_result.transpose()?;
Ok((boundary_char, new))
}
pub fn unescape_next<'a, 'b, I: AsRef<[u8]> + ?Sized>(
&'a mut self,
next_part: &'b I,
) -> (Option<Result<char, UnescapeError>>, UnescapeNext<'a, 'b>) {
let mut next_part_slice = next_part.as_ref();
let boundary_char = if self.stitch_len > 0 {
let old_stitch_len = self.stitch_len as usize;
let needed = self.stitch_buf.len() - old_stitch_len;
let to_copy = needed.min(next_part_slice.len());
self.stitch_buf[old_stitch_len..old_stitch_len + to_copy]
.copy_from_slice(&next_part_slice[..to_copy]);
self.stitch_len += to_copy as u8;
let mut unescaper = unescape(&self.stitch_buf[..self.stitch_len as usize]);
let next = unescaper.next();
match next {
Some(Ok(token)) => {
let total_consumed = (self.stitch_len as usize) - unescaper.remnant().len();
let consumed_from_next = total_consumed - old_stitch_len;
next_part_slice = &next_part_slice[consumed_from_next..];
let unescaped_char = match token {
UnescapedToken::Unescaped(c) => c,
_ => unreachable!("unescaper should produce a char from an escape"),
};
self.stitch_len = 0;
Some(Ok(unescaped_char))
}
Some(Err(err)) => {
if err.kind == UnescapeErrorKind::UnexpectedEof {
next_part_slice = &next_part_slice[to_copy..]; None } else {
self.stitch_len = old_stitch_len as u8;
Some(Err(err))
}
}
None => {
unreachable!();
}
}
} else {
None
};
let iterator = UnescapeNext {
stream: self,
inner: UnescapeTokens::new(next_part_slice),
};
(boundary_char, iterator)
}
pub fn finish(self) -> Result<(), UnescapeError> {
if self.stitch_len > 0 {
let buf = &self.stitch_buf[..self.stitch_len as usize];
if let Some(Err(e)) = unescape(buf).next() {
debug_assert_eq!(
e,
UnescapeError {
kind: UnescapeErrorKind::UnexpectedEof,
offset: self.stitch_len
}
);
return Err(e);
}
}
Ok(())
}
pub fn clear(&mut self) {
self.stitch_len = 0;
}
#[inline]
#[deprecated(
since = "0.3.1",
note = "This sync-only function is superseded by the runtime-agnostic `unescape_stream_into!` macro."
)]
pub fn unescape_from_fn<Src, Dst, SrcError, DstError, B>(
self,
src: Src,
dst: Dst,
) -> Result<(), UnescapeFnError<SrcError, DstError>>
where
Src: FnMut() -> Option<Result<B, SrcError>>,
Dst: FnMut(UnescapedToken<'_>) -> Result<(), DstError>,
B: AsRef<[u8]>,
{
#[allow(deprecated)]
self.unescape_from_source(
FnMutChunkSource {
closure: src,
_phantom: core::marker::PhantomData,
},
dst,
)
}
#[inline]
#[deprecated(
since = "0.3.1",
note = "This sync-only function is superseded by the runtime-agnostic `unescape_stream_into!` macro."
)]
#[allow(deprecated)]
pub fn unescape_from_source<Src, Dst, SrcError, DstError>(
mut self,
mut src: Src,
mut dst: Dst,
) -> Result<(), UnescapeFnError<SrcError, DstError>>
where
Src: ChunkSource<Error = SrcError>,
Dst: FnMut(UnescapedToken<'_>) -> Result<(), DstError>,
{
while let Some(next) = src.next_chunk() {
let next = next.map_err(UnescapeFnError::Src)?;
let (boundary, next) = self
.try_unescape_next(next.as_ref())
.map_err(UnescapeFnError::Unescape)?;
if let Some(ch) = boundary {
dst(UnescapedToken::Unescaped(ch)).map_err(UnescapeFnError::Dst)?;
}
for token in next {
let token = token.map_err(UnescapeFnError::Unescape)?;
dst(token).map_err(UnescapeFnError::Dst)?;
}
}
self.finish().map_err(UnescapeFnError::Unescape)
}
}
#[derive(Clone, Debug)]
pub enum UnescapeFnError<Src, Dst> {
Unescape(UnescapeError),
Src(Src),
Dst(Dst),
}
impl<Src, Dst: core::fmt::Display> core::fmt::Display for UnescapeFnError<Src, Dst>
where
Src: core::fmt::Display,
{
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
UnescapeFnError::Unescape(e) => write!(f, "unescape error: {e}"),
UnescapeFnError::Src(e) => write!(f, "source error: {e}"),
UnescapeFnError::Dst(e) => write!(f, "destination error: {e}"),
}
}
}
#[cfg(feature = "std")]
impl<Src, Dst> std::error::Error for UnescapeFnError<Src, Dst>
where
Src: std::error::Error + 'static,
Dst: std::error::Error + 'static,
{
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
UnescapeFnError::Unescape(e) => Some(e),
UnescapeFnError::Src(e) => Some(e),
UnescapeFnError::Dst(e) => Some(e),
}
}
}
impl From<UnescapeFnError<Infallible, Infallible>> for UnescapeError {
fn from(value: UnescapeFnError<Infallible, Infallible>) -> Self {
match value {
UnescapeFnError::Unescape(unescape_error) => unescape_error,
UnescapeFnError::Src(i) => match i {},
UnescapeFnError::Dst(i) => match i {},
}
}
}
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(Debug)]
pub struct UnescapeNext<'a, 'b> {
stream: &'a mut UnescapeStream,
inner: UnescapeTokens<'b>,
}
impl<'a, 'b> Iterator for UnescapeNext<'a, 'b> {
type Item = Result<UnescapedToken<'b>, UnescapeError>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
match self.inner.next() {
Some(Err(e)) if e.kind == UnescapeErrorKind::UnexpectedEof => {
let remnant = self.inner.remnant();
debug_assert!(!remnant.is_empty() && remnant[0] == b'\\', "{remnant:?}");
debug_assert!(remnant.len() < self.stream.stitch_buf.len(), "{remnant:?}");
self.stream.stitch_buf[..remnant.len()].copy_from_slice(remnant);
self.stream.stitch_len = remnant.len() as u8;
None
}
other => other,
}
}
}
#[deprecated(
since = "0.3.1",
note = "This sync-only trait is superseded by the runtime-agnostic `unescape_stream_into!` macro."
)]
pub trait ChunkSource {
type Error;
type Chunk<'a>: AsRef<[u8]> + 'a
where
Self: 'a;
fn next_chunk<'a>(&'a mut self) -> Option<Result<Self::Chunk<'a>, Self::Error>>;
}
#[allow(deprecated)]
impl<T> ChunkSource for &mut T
where
T: ChunkSource,
{
type Error = T::Error;
type Chunk<'a>
= T::Chunk<'a>
where
Self: 'a;
#[inline]
fn next_chunk<'a>(&'a mut self) -> Option<Result<Self::Chunk<'a>, Self::Error>> {
(*self).next_chunk()
}
}
#[cfg(feature = "std")]
#[deprecated(
since = "0.3.1",
note = "This sync-only struct is superseded by the runtime-agnostic `unescape_stream_into!` macro."
)]
pub struct ReadChunkSource<R, B = Vec<u8>> {
reader: R,
buffer: B,
}
#[cfg(feature = "std")]
#[allow(deprecated)]
impl<R, B> ReadChunkSource<R, B> {
pub fn new(reader: R, buffer: B) -> Self {
Self { reader, buffer }
}
pub fn with_buffer_size(reader: R, size: usize) -> ReadChunkSource<R, Vec<u8>> {
ReadChunkSource::new(reader, std::vec![0u8; size])
}
}
#[allow(deprecated)]
impl<R, B> ChunkSource for ReadChunkSource<R, B>
where
R: std::io::Read,
B: AsMut<[u8]>,
{
type Error = std::io::Error;
type Chunk<'a>
= &'a [u8]
where
Self: 'a;
#[inline]
fn next_chunk<'a>(&'a mut self) -> Option<Result<Self::Chunk<'a>, Self::Error>> {
let buffer = self.buffer.as_mut();
match self.reader.read(buffer) {
Ok(0) => None, Ok(n) => Some(Ok(&buffer[..n])),
Err(e) => Some(Err(e)),
}
}
}
pub struct FnMutChunkSource<'s, F, B, E>
where
F: FnMut() -> Option<Result<B, E>>,
B: AsRef<[u8]> + 's,
{
closure: F,
_phantom: core::marker::PhantomData<&'s ()>,
}
impl<'s, F, B, E> FnMutChunkSource<'s, F, B, E>
where
F: FnMut() -> Option<Result<B, E>>,
B: AsRef<[u8]> + 's,
{
pub fn new(closure: F) -> Self {
FnMutChunkSource {
closure,
_phantom: core::marker::PhantomData,
}
}
}
#[allow(deprecated)]
impl<'s, F, B, E> ChunkSource for FnMutChunkSource<'s, F, B, E>
where
F: FnMut() -> Option<Result<B, E>>,
B: AsRef<[u8]> + 's,
{
type Error = E;
type Chunk<'a>
= B
where
Self: 'a;
#[inline(always)]
fn next_chunk<'a>(&'a mut self) -> Option<Result<Self::Chunk<'a>, Self::Error>> {
(self.closure)()
}
}
#[cfg(test)]
mod assumptions_tests {
use super::*;
#[test]
fn insufficient_data_is_not_lone_surrogate() {
let mut unescaper = unescape(br"\uD83D");
assert_eq!(
unescaper.next().unwrap().unwrap_err(),
UnescapeError {
kind: UnescapeErrorKind::UnexpectedEof,
offset: 6
}
);
let mut unescaper = unescape(br"\uD83D\");
assert_eq!(
unescaper.next().unwrap().unwrap_err(),
UnescapeError {
kind: UnescapeErrorKind::UnexpectedEof,
offset: 7
}
);
}
#[test]
fn lone_surrogate_is_not_eof() {
let mut unescaper = unescape(br"\uD83Da");
assert_eq!(
unescaper.next().unwrap().unwrap_err(),
UnescapeError {
kind: UnescapeErrorKind::LoneSurrogate(crate::LoneSurrogateError {
surrogate: 0xD83D
}),
offset: 6
}
);
}
#[test]
fn unescape_does_not_commit_on_error() {
let err_input = br"\uD83D";
let mut unescaper = unescape(err_input);
let err = unescaper.next().unwrap().unwrap_err();
assert_eq!(
err,
UnescapeError {
kind: UnescapeErrorKind::UnexpectedEof,
offset: 6
}
);
assert_eq!(unescaper.remnant(), err_input);
}
#[test]
fn unescape_keeps_erroring() {
let err_input = br"\z";
let mut unescaper = unescape(err_input);
let err_1 = unescaper.next().unwrap().unwrap_err();
let err_2 = unescaper.next().unwrap().unwrap_err();
assert_eq!(err_1, err_2)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::{str, string::String, vec::Vec};
fn run_stream_test<I, S>(parts: I) -> Result<String, UnescapeError>
where
I: IntoIterator<Item = S>,
S: AsRef<[u8]>,
{
let unescaper = UnescapeStream::new();
let mut parts = parts.into_iter();
let mut output = String::new();
#[allow(deprecated)]
unescaper.unescape_from_fn::<_, _, Infallible, Infallible, _>(
|| parts.next().map(Ok),
|token| {
match token {
UnescapedToken::Unescaped(c) => output.push(c),
UnescapedToken::Literal(s) => output.push_str(str::from_utf8(s).unwrap()),
}
Ok(())
},
)?;
Ok(output)
}
#[test]
fn test_single_chunk_no_escapes() {
let parts = [br"hello world"];
assert_eq!(run_stream_test(parts).unwrap(), "hello world");
}
#[test]
fn test_single_chunk_with_escapes() {
let parts = [br#"hello \"world\t\n\\"#];
assert_eq!(run_stream_test(parts).unwrap(), "hello \"world\t\n\\");
}
#[test]
fn test_multiple_chunks_no_escapes() {
let parts = [&br"hello "[..], &br"world"[..]];
assert_eq!(run_stream_test(parts).unwrap(), "hello world");
}
#[test]
fn test_empty_chunks() {
let parts = [
&br"hello"[..],
&br""[..],
&br" "[..],
&br""[..],
&br"world"[..],
];
assert_eq!(run_stream_test(parts).unwrap(), "hello world");
}
#[test]
fn test_split_before_escape() {
let parts = [&br"hello"[..], &br"\nworld"[..]];
assert_eq!(run_stream_test(parts).unwrap(), "hello\nworld");
}
#[test]
fn test_split_during_simple_escape() {
let parts = [&br"hello\"[..], &br"nworld"[..]];
assert_eq!(run_stream_test(parts).unwrap(), "hello\nworld");
}
#[test]
fn test_split_during_unicode_escape() {
let parts = [&br"price: \u20"[..], &br"AC"[..]];
assert_eq!(run_stream_test(parts).unwrap(), "price: €");
}
#[test]
fn test_split_during_surrogate_pair() {
let parts = [&br"emoji: \uD83D"[..], &br"\uDE00"[..]];
assert_eq!(run_stream_test(parts).unwrap(), "emoji: 😀");
}
#[test]
fn test_split_between_surrogate_pair_halves() {
let parts = [&br"emoji: \uD83D\"[..], &br"uDE00"[..]];
assert_eq!(run_stream_test(parts).unwrap(), "emoji: 😀");
}
#[test]
fn test_split_in_second_surrogate() {
let parts = [&br"emoji: \uD83D\uDE"[..], &br"00"[..]];
assert_eq!(run_stream_test(parts).unwrap(), "emoji: 😀");
}
#[test]
fn test_tiny_chunks_across_surrogate_pair() {
let parts = [
br"e", br"m", br"o", br"j", br"i", br":", br" ", br"\", br"u", br"D", br"8", br"3",
br"D", br"\", br"u", br"D", br"E", br"0", br"0",
];
assert_eq!(run_stream_test(parts).unwrap(), "emoji: 😀");
}
#[test]
fn test_finish_success() {
let mut unescaper = UnescapeStream::new();
let (boundary, rest) = unescaper.unescape_next(br"hello");
assert!(boundary.is_none());
assert_eq!(
rest.map(|r| r.unwrap()).collect::<Vec<UnescapedToken>>(),
alloc::vec![UnescapedToken::Literal(br"hello")]
);
let (boundary, rest) = unescaper.unescape_next(br" world");
assert!(boundary.is_none());
assert_eq!(
rest.map(|r| r.unwrap()).collect::<Vec<UnescapedToken>>(),
alloc::vec![UnescapedToken::Literal(br" world")]
);
assert!(unescaper.finish().is_ok());
}
#[test]
fn test_finish_error_on_incomplete_escape() {
let mut unescaper = UnescapeStream::new();
let (_, next) = unescaper.unescape_next(br"hello\");
next.for_each(|r| {
r.unwrap();
});
let err = unescaper.finish().unwrap_err();
assert_eq!(err.kind, UnescapeErrorKind::UnexpectedEof);
}
#[test]
fn test_finish_error_on_incomplete_unicode() {
let mut unescaper = UnescapeStream::new();
let (_, next) = unescaper.unescape_next(br"hello\u12");
next.for_each(|r| {
r.unwrap();
});
let err = unescaper.finish().unwrap_err();
assert_eq!(err.kind, UnescapeErrorKind::UnexpectedEof);
}
#[test]
fn test_error_across_boundary_invalid_escape() {
let parts = [&br"oh\"[..], &br"z"[..]];
let err = run_stream_test(parts).unwrap_err();
assert_eq!(
err.kind,
UnescapeErrorKind::InvalidEscape(crate::InvalidEscapeError { found: b'z' })
)
}
#[test]
fn test_error_across_boundary_lone_surrogate() {
let parts = [&br"\uD83D"[..], &br"abc"[..]];
let err = run_stream_test(parts).unwrap_err();
assert_eq!(
err.kind,
UnescapeErrorKind::LoneSurrogate(crate::LoneSurrogateError { surrogate: 0xD83D })
);
}
#[test]
fn test_error_across_boundary_not_low_surrogate() {
let parts = [&br"\uD83D"[..], &br"\u0020"[..]]; let err = run_stream_test(parts).unwrap_err();
assert_eq!(
err.kind,
UnescapeErrorKind::LoneSurrogate(crate::LoneSurrogateError { surrogate: 0xD83D })
);
}
#[test]
fn test_clear_after_error() {
let mut unescaper = UnescapeStream::new();
let (_, next) = unescaper.try_unescape_next("abc\\").unwrap();
next.for_each(|r| {
r.unwrap();
});
let err = unescaper.try_unescape_next(br"z").unwrap_err();
assert_eq!(
err.kind,
UnescapeErrorKind::InvalidEscape(crate::InvalidEscapeError { found: b'z' })
);
assert!(unescaper.clone().finish().is_err());
unescaper.clear();
assert!(unescaper.clone().finish().is_ok());
let mut output = String::new();
let (boundary, rest) = unescaper.try_unescape_next(br"good data").unwrap();
assert!(boundary.is_none());
for token in rest {
match token {
Ok(UnescapedToken::Literal(literal)) => {
output.push_str(str::from_utf8(literal).unwrap())
}
_ => unreachable!(),
}
}
assert_eq!(output, "good data");
}
#[test]
fn test_error_after_successful_boundary() {
let mut unescaper = UnescapeStream::new();
let (_, mut rest) = unescaper.unescape_next(br"\uD83D");
assert!(rest.next().is_none());
let (boundary, mut rest) = unescaper.unescape_next(br"\uDE00\z");
let boundary_char = boundary.unwrap().unwrap();
assert_eq!(boundary_char, '😀');
let err = rest.next().unwrap().unwrap_err();
assert_eq!(
err.kind,
UnescapeErrorKind::InvalidEscape(crate::InvalidEscapeError { found: b'z' })
);
assert_eq!(unescaper.stitch_len, 0);
assert!(unescaper.finish().is_ok());
}
#[test]
#[allow(deprecated)]
fn test_unescape_from_source_src_error() {
let unescaper = UnescapeStream::new();
let mut parts = alloc::vec![Ok(b"hello".as_slice()), Err("read error")].into_iter();
let result =
unescaper.unescape_from_fn(|| parts.next(), |_| -> Result<(), Infallible> { Ok(()) });
match result {
Err(UnescapeFnError::Src("read error")) => (), _ => panic!("Expected a source error"),
}
}
#[test]
#[allow(deprecated)]
fn test_unescape_from_source_dst_error() {
let unescaper = UnescapeStream::new();
let mut parts = alloc::vec![Result::<_, ()>::Ok("hello")].into_iter();
let result = unescaper.unescape_from_fn(
|| parts.next(),
|_| -> Result<(), &str> { Err("write error") },
);
match result {
Err(UnescapeFnError::Dst("write error")) => (), _ => panic!("Expected a destination error"),
}
}
#[test]
fn macro_stream() {
fn sync_stream() -> Result<(), std::boxed::Box<dyn std::error::Error>> {
let mut parts = std::vec![
br#"{"message": "Hello, W\"orld! \uD83D"#.as_slice(),
br#"\uDE00"}"#.as_slice(),
]
.into_iter();
let mut unescaped_string = std::string::String::new();
unescape_stream_into! {
Read: {
parts.next()
},
Write: |token| {
match token {
UnescapedToken::Literal(literal) => {
unescaped_string.push_str(std::str::from_utf8(literal)?)
}
UnescapedToken::Unescaped(ch) => unescaped_string.push(ch),
}
},
};
assert_eq!(unescaped_string, r#"{"message": "Hello, W"orld! 😀"}"#);
Ok(())
}
fn sync_read() -> Result<(), std::boxed::Box<dyn std::error::Error>> {
let mut read = &br#"{"message": "Hello, W\"orld! \uD83D\uDE00"}"#[..];
let buffer = &mut [0u8; 5][..];
let mut unescaped_string = std::string::String::new();
use std::io::Read;
unescape_stream_into! {
Read: {
let n = read.read(buffer)?;
if n == 0 {
None
} else {
Some(&buffer[..n])
}
},
Write: |token| {
match token {
UnescapedToken::Literal(literal) => {
unescaped_string.push_str(std::str::from_utf8(literal)?)
}
UnescapedToken::Unescaped(ch) => unescaped_string.push(ch),
}
},
};
assert_eq!(unescaped_string, r#"{"message": "Hello, W"orld! 😀"}"#);
Ok(())
}
async fn async_stream() -> Result<(), std::boxed::Box<dyn std::error::Error>> {
struct AsyncIter<'a> {
iter: <Vec<&'a [u8]> as IntoIterator>::IntoIter,
}
impl<'a> AsyncIter<'a> {
async fn next(&mut self) -> Option<&'a [u8]> {
self.iter.next()
}
}
struct AsyncWrite {
write: std::string::String,
}
impl AsyncWrite {
async fn write(
&mut self,
token: UnescapedToken<'_>,
) -> Result<(), std::boxed::Box<dyn std::error::Error>> {
match token {
UnescapedToken::Literal(literal) => {
self.write.push_str(std::str::from_utf8(literal)?)
}
UnescapedToken::Unescaped(ch) => self.write.push(ch),
}
Ok(())
}
}
let mut parts = AsyncIter {
iter: std::vec![
br#"{"message": "Hello, W\"orld! \uD83D"#.as_slice(),
br#"\uDE00"}"#.as_slice(),
]
.into_iter(),
};
let mut unescaped_string = AsyncWrite {
write: std::string::String::new(),
};
unescape_stream_into! {
Read: {
parts.next().await
},
Write: |token| {
unescaped_string.write(token).await?
},
};
assert_eq!(
unescaped_string.write,
r#"{"message": "Hello, W"orld! 😀"}"#
);
Ok(())
}
sync_stream().unwrap();
sync_read().unwrap();
let fut = std::pin::pin!(async_stream());
use std::future::Future;
let result = fut.poll(&mut std::task::Context::from_waker(std::task::Waker::noop()));
assert!(matches!(result, std::task::Poll::Ready(Ok(()))))
}
}