json_escape/explicit.rs
1//! More explicit and fine-grained iterators for JSON escaping and unescaping.
2//!
3//! This module provides an alternative API to the one in the crate root. While the
4//! root API yields slices (`&str` or `&[u8]`) that represent the final output,
5//! this module's iterators yield "chunk" structs. These structs distinguish between
6//! parts of the input that were processed literally and the specific characters
7//! that were escaped or unescaped.
8//!
9//! This approach offers several advantages:
10//! - **Greater Control**: You can inspect each component of the transformation,
11//! which can be useful for debugging, logging, or more complex data processing.
12//! - **Potential Performance**: By avoiding the need to look up single-byte escape
13//! sequences in a table on every iteration, some workflows may see a minor
14//! performance improvement.
15//! - **Clarity**: The structure of the output more closely reflects the transformation
16//! process, which can make the logic easier to follow.
17//!
18//! # Example: Escaping
19//!
20//! ```
21//! use json_escape::explicit::escape_str;
22//!
23//! let mut escaper = escape_str("a\nb");
24//!
25//! // The first chunk contains the literal "a" and the escaped newline.
26//! let chunk1 = escaper.next().unwrap();
27//! assert_eq!("a", chunk1.literal());
28//! assert_eq!(Some(r#"\n"#), chunk1.escaped());
29//!
30//! // The second chunk contains the literal "b" and no escaped sequence.
31//! let chunk2 = escaper.next().unwrap();
32//! assert_eq!("b", chunk2.literal());
33//! assert_eq!(None, chunk2.escaped());
34//!
35//! // The iterator is now exhausted.
36//! assert!(escaper.next().is_none());
37//! ```
38//!
39//! # Example: Unescaping
40//!
41//! ```
42//! use json_escape::explicit::unescape;
43//!
44//! let mut unescaper = unescape(br"hello\tworld");
45//!
46//! // The first chunk contains the literal "hello" and the unescaped tab.
47//! let chunk1 = unescaper.next().unwrap().unwrap();
48//! assert_eq!(b"hello", chunk1.literal());
49//! assert_eq!(Some('\t'), chunk1.unescaped());
50//!
51//! // The second chunk contains the literal "world" and no unescaped character.
52//! let chunk2 = unescaper.next().unwrap().unwrap();
53//! assert_eq!(b"world", chunk2.literal());
54//! assert_eq!(None, chunk2.unescaped());
55//!
56//! // The iterator is now exhausted.
57//! assert!(unescaper.next().is_none());
58//! ```
59//!
60//! Both `Escape` and `Unescape` iterators provide `display` helpers for easy integration
61//! with Rust's formatting system, preserving the zero-allocation benefits of the main API.
62
63#[cfg(feature = "alloc")]
64use crate::DecodeUtf8Error;
65use crate::{ESCAPE_TABLE, UnescapeError, display_bytes_utf8};
66use crate::{InvalidEscapeError, UnescapeErrorKind, find_escape_char};
67use core::fmt;
68use core::iter::FusedIterator;
69use core::str;
70
71#[cfg(feature = "alloc")]
72use alloc::{borrow::Cow, string::String, vec::Vec};
73
74//==============================================================================
75// Escaping
76//==============================================================================
77
78/// Creates an iterator that yields chunks of an escaped JSON string.
79///
80/// See the [module-level documentation](self) for more details.
81#[inline]
82pub fn escape_str(s: &str) -> Escape<'_> {
83 Escape {
84 bytes: s.as_bytes(),
85 }
86}
87
88/// A chunk of a JSON-escaped string, separating the literal part from the escaped sequence.
89///
90/// This struct is yielded by the [`Escape`] iterator.
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
92pub struct EscapedChunk<'a> {
93 /// A slice of the original input that did not require escaping.
94 literal: &'a str,
95 /// The escaped sequence (e.g., `r#"\n"#`, `r#"\""#`) that immediately follows the literal part.
96 /// Is `None` if this is the last chunk and it has no trailing escape.
97 escaped: Option<&'static str>,
98}
99
100impl<'a> EscapedChunk<'a> {
101 /// Returns the literal part of the chunk, which is a slice of the original string.
102 #[inline]
103 pub const fn literal(&self) -> &'a str {
104 self.literal
105 }
106
107 /// Returns the escaped part of the chunk, if any.
108 #[inline]
109 pub const fn escaped(&self) -> Option<&'static str> {
110 self.escaped
111 }
112}
113
114impl<'a> fmt::Display for EscapedChunk<'a> {
115 #[inline]
116 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
117 f.write_str(self.literal)?;
118 if let Some(s) = self.escaped {
119 f.write_str(s)?;
120 }
121 Ok(())
122 }
123}
124
125/// An iterator over a string that yields [`EscapedChunk`]s.
126///
127/// Created by the [`escape_str`] function.
128#[derive(Clone)]
129#[must_use = "iterators are lazy and do nothing unless consumed"]
130pub struct Escape<'a> {
131 bytes: &'a [u8],
132}
133
134impl<'a> Iterator for Escape<'a> {
135 type Item = EscapedChunk<'a>;
136
137 #[inline]
138 fn next(&mut self) -> Option<Self::Item> {
139 if self.bytes.is_empty() {
140 return None;
141 }
142
143 let pos = find_escape_char(self.bytes).unwrap_or(self.bytes.len());
144 let (literal_bytes, rest) = self.bytes.split_at(pos);
145
146 // SAFETY: `find_escape_char` guarantees `pos` is on a UTF-8 boundary.
147 let literal = unsafe { str::from_utf8_unchecked(literal_bytes) };
148
149 if rest.is_empty() {
150 self.bytes = &[];
151 Some(EscapedChunk {
152 literal,
153 escaped: None,
154 })
155 } else {
156 let escaped_char_byte = rest[0];
157 self.bytes = &rest[1..];
158 Some(EscapedChunk {
159 literal,
160 escaped: Some(
161 ESCAPE_TABLE[escaped_char_byte as usize]
162 .expect("find_escape_char found a byte not in ESCAPE_TABLE"),
163 ),
164 })
165 }
166 }
167
168 fn size_hint(&self) -> (usize, Option<usize>) {
169 if self.bytes.is_empty() {
170 (0, Some(0))
171 } else {
172 // We'll yield at least 1 chunk, and at most `len` chunks if every byte is escaped.
173 (1, Some(self.bytes.len()))
174 }
175 }
176}
177
178impl<'a> FusedIterator for Escape<'a> {}
179
180impl<'a> fmt::Display for Escape<'a> {
181 /// This allows the escaped output to be written directly to a formatter
182 /// without intermediate allocation.
183 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
184 for chunk in self.clone() {
185 write!(f, "{chunk}")?;
186 }
187 Ok(())
188 }
189}
190
191impl fmt::Debug for Escape<'_> {
192 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
193 f.debug_struct("Escape").finish_non_exhaustive()
194 }
195}
196
197impl<B: AsRef<[u8]> + ?Sized> PartialEq<B> for Escape<'_> {
198 /// Compares the escaped output with any byte-slice-like object.
199 ///
200 /// This is a convenience for testing, allowing you to check the fully
201 /// concatenated result of an `Escape` iterator against a known `&str` or `&[u8]`.
202 fn eq(&self, other: &B) -> bool {
203 let mut other = other.as_ref();
204 for chunk in self.clone() {
205 // Check literal part
206 if !other.starts_with(chunk.literal.as_bytes()) {
207 return false;
208 }
209 other = &other[chunk.literal.len()..];
210
211 // Check escaped part
212 if let Some(escaped_str) = chunk.escaped {
213 if !other.starts_with(escaped_str.as_bytes()) {
214 return false;
215 }
216 other = &other[escaped_str.len()..];
217 }
218 }
219 other.is_empty()
220 }
221}
222
223#[cfg(feature = "alloc")]
224impl<'a> From<Escape<'a>> for Cow<'a, str> {
225 /// Efficiently collects the escaped parts into a `Cow<'a, str>`.
226 ///
227 /// This implementation is optimized to avoid allocation if possible:
228 /// - If the input string requires **no escaping**, it returns `Cow::Borrowed`
229 /// with a slice of the original string.
230 /// - If escaping is needed, it allocates a `String` and returns `Cow::Owned`.
231 fn from(mut iter: Escape<'a>) -> Self {
232 match iter.next() {
233 None => Cow::Borrowed(""),
234 Some(first) => {
235 if first.escaped.is_none() {
236 // No escape in the first (and only) chunk, so no escaping was needed.
237 Cow::Borrowed(first.literal)
238 } else {
239 // Escaping occurred. We must allocate.
240 let mut s = String::with_capacity(iter.bytes.len() + 16);
241 s.push_str(first.literal);
242 s.push_str(first.escaped.unwrap());
243 for chunk in iter {
244 s.push_str(chunk.literal);
245 if let Some(escaped) = chunk.escaped {
246 s.push_str(escaped);
247 }
248 }
249 Cow::Owned(s)
250 }
251 }
252 }
253 }
254}
255
256//==============================================================================
257// Unescaping
258//==============================================================================
259
260/// Creates an iterator that yields chunks of an unescaped JSON string.
261///
262/// See the [module-level documentation](self) for more details.
263#[inline]
264pub fn unescape<I: AsRef<[u8]> + ?Sized>(input: &I) -> Unescape<'_> {
265 Unescape {
266 bytes: input.as_ref(),
267 }
268}
269
270/// Creates a streaming JSON string unescaper that handles enclosing quotes.
271///
272/// This function is a convenience wrapper around [`unescape`]. If the input byte
273/// slice starts and ends with a double-quote (`"`), the quotes are trimmed
274/// before the content is unescaped.
275///
276/// If the input is not enclosed in quotes, this function behaves identically to
277/// [`unescape`].
278///
279/// # Examples
280///
281/// ```
282/// use json_escape::explicit::unescape_quoted;
283///
284/// // An input string with quotes and an escaped tab.
285/// let bytes = br#""\tline""#;
286/// let mut unescaper = unescape_quoted(bytes);
287///
288/// // The first chunk is the unescaped tab character.
289/// let chunk1 = unescaper.next().unwrap().unwrap();
290/// assert_eq!(b"", chunk1.literal());
291/// assert_eq!(Some('\t'), chunk1.unescaped());
292///
293/// // The second chunk is the literal "line".
294/// let chunk2 = unescaper.next().unwrap().unwrap();
295/// assert_eq!(b"line", chunk2.literal());
296/// assert_eq!(None, chunk2.unescaped());
297///
298/// // The iterator is now exhausted.
299/// assert!(unescaper.next().is_none());
300/// ```
301#[inline]
302pub fn unescape_quoted(bytes: &[u8]) -> Unescape<'_> {
303 let inner = if bytes.len() >= 2 && bytes.first() == Some(&b'"') && bytes.last() == Some(&b'"') {
304 &bytes[1..bytes.len() - 1]
305 } else {
306 bytes
307 };
308 unescape(inner)
309}
310
311/// A chunk of a JSON-unescaped byte slice, separating the literal part from the unescaped character.
312///
313/// This struct is yielded by the [`Unescape`] iterator.
314#[derive(Debug, Clone, Copy, PartialEq, Eq)]
315pub struct UnescapedChunk<'a> {
316 /// A slice of the original input that did not require unescaping.
317 literal: &'a [u8],
318 /// The single character that was unescaped.
319 /// Is `None` if this is the last chunk and it has no trailing unescaped character.
320 unescaped: Option<char>,
321}
322
323impl<'a> UnescapedChunk<'a> {
324 /// Returns the literal part of the chunk, which is a slice of the original bytes.
325 #[inline]
326 pub const fn literal(&self) -> &'a [u8] {
327 self.literal
328 }
329
330 /// Returns the unescaped character, if any.
331 #[inline]
332 pub const fn unescaped(&self) -> Option<char> {
333 self.unescaped
334 }
335
336 /// Returns a displayable wrapper that will format the chunk as a UTF-8 string.
337 ///
338 /// If the literal part of the chunk contains invalid UTF-8 sequences, this
339 /// will result in a `fmt::Error`.
340 pub fn display_utf8(&self) -> DisplayUnescapedChunk<'_> {
341 DisplayUnescapedChunk {
342 chunk: self,
343 lossy: false,
344 }
345 }
346
347 /// Returns a displayable wrapper that will format the chunk as a lossy UTF-8 string.
348 ///
349 /// Any invalid UTF-8 sequences in the literal part of the chunk will be
350 /// replaced with the U+FFFD replacement character.
351 pub fn display_utf8_lossy(&self) -> DisplayUnescapedChunk<'_> {
352 DisplayUnescapedChunk {
353 chunk: self,
354 lossy: true,
355 }
356 }
357}
358
359/// Helper struct for safely displaying an [`UnescapedChunk`].
360pub struct DisplayUnescapedChunk<'a> {
361 chunk: &'a UnescapedChunk<'a>,
362 lossy: bool,
363}
364
365impl<'a> fmt::Display for DisplayUnescapedChunk<'a> {
366 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
367 display_bytes_utf8(self.chunk.literal, f, self.lossy)?;
368 if let Some(c) = self.chunk.unescaped {
369 use fmt::Write as _;
370
371 f.write_char(c)?;
372 }
373 Ok(())
374 }
375}
376
377/// An iterator over a byte slice that yields [`UnescapedChunk`]s.
378///
379/// Created by the [`unescape`] function.
380#[derive(Clone)]
381#[must_use = "iterators are lazy and do nothing unless consumed"]
382pub struct Unescape<'a> {
383 bytes: &'a [u8],
384}
385
386impl<'a> Iterator for Unescape<'a> {
387 type Item = Result<UnescapedChunk<'a>, UnescapeError>;
388
389 #[inline]
390 fn next(&mut self) -> Option<Self::Item> {
391 use memchr::memchr;
392
393 if self.bytes.is_empty() {
394 return None;
395 }
396
397 let pos = match memchr(b'\\', self.bytes) {
398 Some(p) => p,
399 None => {
400 // No more backslashes, yield the rest as a final literal chunk.
401 let chunk = UnescapedChunk {
402 literal: self.bytes,
403 unescaped: None,
404 };
405 self.bytes = &[];
406 return Some(Ok(chunk));
407 }
408 };
409
410 let (literal, rest) = self.bytes.split_at(pos);
411 // rest starts with '\\'
412 let mut remainder = &rest[1..];
413
414 let unescaped_char = match remainder.first() {
415 Some(b'u') => {
416 // Temporarily advance past 'u'
417 remainder = &remainder[1..];
418 // Use a helper from the main unescaper, giving it a mutable slice reference
419 // that it can advance.
420 match crate::Unescape::handle_unicode_escape_from_slice(&mut remainder) {
421 Ok(c) => c,
422 Err(e) => {
423 // FIX: handle_unicode_escape_from_slice already handles this for us.
424 // Adjust offset: error is relative to `\u`, but we need it relative to chunk start.
425 return Some(Err(e));
426 }
427 }
428 }
429 Some(&byte) => {
430 remainder = &remainder[1..];
431 match UNESCAPE_TABLE[byte as usize] {
432 Some(c) => c,
433 None => {
434 return Some(Err(UnescapeError {
435 kind: UnescapeErrorKind::InvalidEscape(InvalidEscapeError {
436 found: byte,
437 }),
438 // The invalid character is 1 byte after '\'.
439 offset: 1,
440 }));
441 }
442 }
443 }
444 None => {
445 return Some(Err(UnescapeError {
446 kind: UnescapeErrorKind::UnexpectedEof,
447 // EOF occurred 1 byte after '\'.
448 offset: 1,
449 }));
450 }
451 };
452
453 self.bytes = remainder;
454 Some(Ok(UnescapedChunk {
455 literal,
456 unescaped: Some(unescaped_char),
457 }))
458 }
459}
460
461impl<'a> FusedIterator for Unescape<'a> {}
462
463impl<'a> Unescape<'a> {
464 /// Decodes the unescaped byte stream into a UTF-8 string.
465 ///
466 /// This method consumes the iterator and collects all resulting byte chunks
467 /// into a `Cow<[u8]>`, which is then validated as UTF-8. If an unescaping
468 /// error occurs, it's returned immediately. If the final sequence of bytes
469 /// is not valid UTF-8, a UTF-8 error is returned.
470 ///
471 /// This is optimized to return a `Cow::Borrowed` if no escapes were present
472 /// in the input, avoiding allocation.
473 ///
474 /// **Requires the `alloc` feature.**
475 ///
476 /// # Example
477 ///
478 /// ```
479 /// # #[cfg(feature = "alloc")] {
480 /// use json_escape::explicit::unescape;
481 ///
482 /// let input = r#"Emoji: \uD83D\uDE00"#;
483 /// let cow = unescape(input).decode_utf8().unwrap();
484 ///
485 /// assert_eq!(cow, "Emoji: 😀");
486 /// # }
487 /// ```
488 #[cfg(feature = "alloc")]
489 pub fn decode_utf8(self) -> Result<Cow<'a, str>, DecodeUtf8Error> {
490 match self.try_into().map_err(DecodeUtf8Error::Unescape)? {
491 Cow::Borrowed(bytes) => str::from_utf8(bytes)
492 .map(Cow::Borrowed)
493 .map_err(DecodeUtf8Error::Utf8),
494 Cow::Owned(bytes) => String::from_utf8(bytes)
495 .map(Cow::Owned)
496 .map_err(|e| DecodeUtf8Error::Utf8(e.utf8_error())),
497 }
498 }
499
500 /// Decodes the unescaped byte stream lossily into a UTF-8 string.
501 ///
502 /// This is similar to [`Unescape::decode_utf8`] but replaces any invalid UTF-8 sequences
503 /// with the replacement character (`U+FFFD`) instead of returning an error.
504 ///
505 /// An `UnescapeError` can still be returned if the JSON escaping itself is invalid.
506 ///
507 /// **Requires the `alloc` feature.**
508 #[cfg(feature = "alloc")]
509 pub fn decode_utf8_lossy(self) -> Result<Cow<'a, str>, UnescapeError> {
510 use crate::decode_utf8_lossy;
511
512 Ok(decode_utf8_lossy(self.try_into()?))
513 }
514
515 /// Returns a wrapper that implements [`fmt::Display`].
516 ///
517 /// If an unescaping error or invalid UTF-8 sequence is encountered,
518 /// a `fmt::Error` is returned, which will cause `format!` and friends to panic.
519 pub fn display_utf8(self) -> DisplayUnescape<'a> {
520 DisplayUnescape {
521 inner: self,
522 lossy: false,
523 }
524 }
525
526 /// Returns a wrapper that implements [`fmt::Display` for lossy UTF-8 decoding.
527 ///
528 /// Invalid UTF-8 sequences will be replaced with the replacement character.
529 /// An unescaping error will still result in a `fmt::Error`.
530 pub fn display_utf8_lossy(self) -> DisplayUnescape<'a> {
531 DisplayUnescape {
532 inner: self,
533 lossy: true,
534 }
535 }
536}
537
538impl fmt::Debug for Unescape<'_> {
539 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
540 f.debug_struct("Unescape").finish_non_exhaustive()
541 }
542}
543
544impl<B: AsRef<[u8]> + ?Sized> PartialEq<B> for Unescape<'_> {
545 /// Compares the unescaped output with a byte-slice-like object.
546 ///
547 /// Returns `true` if the iterator successfully unescapes to produce a byte
548 /// sequence identical to `other`. If an error occurs, returns `false`.
549 fn eq(&self, other: &B) -> bool {
550 let mut other = other.as_ref();
551 let mut char_buf = [0u8; 4];
552
553 for result in self.clone() {
554 match result {
555 Ok(chunk) => {
556 // Check literal part
557 if !other.starts_with(chunk.literal) {
558 return false;
559 }
560 other = &other[chunk.literal.len()..];
561
562 // Check unescaped part
563 if let Some(c) = chunk.unescaped {
564 let char_bytes = c.encode_utf8(&mut char_buf);
565 if !other.starts_with(char_bytes.as_bytes()) {
566 return false;
567 }
568 other = &other[char_bytes.len()..];
569 }
570 }
571 Err(_) => return false, // An erroring iterator cannot be equal.
572 }
573 }
574 other.is_empty()
575 }
576}
577
578impl<B: AsRef<[u8]>> PartialEq<Unescape<'_>> for Result<B, UnescapeError> {
579 /// Compares the unescaper's outcome with a `Result`.
580 ///
581 /// This allows for precise testing of `Unescape` against either a
582 /// successful outcome (`Ok(bytes)`) or a specific failure (`Err(error)`).
583 fn eq(&self, unescape: &Unescape<'_>) -> bool {
584 match self {
585 Ok(expected_bytes) => unescape == expected_bytes,
586 Err(expected_error) => {
587 for result in unescape.clone() {
588 if let Err(actual_error) = result {
589 // The iterator's first error is its final outcome.
590 return actual_error == *expected_error;
591 }
592 }
593 // `unescape` completed successfully, but an error was expected.
594 false
595 }
596 }
597 }
598}
599
600#[cfg(feature = "alloc")]
601impl<'a> TryFrom<Unescape<'a>> for Cow<'a, [u8]> {
602 type Error = UnescapeError;
603
604 /// Efficiently collects the unescaped bytes into a `Cow<'a, [u8]>`.
605 ///
606 /// Returns `Cow::Borrowed` if no escape sequences were present, avoiding
607 /// allocation. Otherwise, returns `Cow::Owned`. If an error occurs, it's
608 /// returned immediately.
609 fn try_from(mut value: Unescape<'a>) -> Result<Self, Self::Error> {
610 match value.next() {
611 None => Ok(Cow::Borrowed(b"")),
612 Some(Ok(first)) => {
613 if first.unescaped.is_none() {
614 // The first and only chunk has no unescaped part. No allocation needed.
615 Ok(Cow::Borrowed(first.literal))
616 } else {
617 // An escape was processed. Must allocate and collect the rest.
618 let mut buf = Vec::with_capacity(value.bytes.len() + 16);
619 buf.extend_from_slice(first.literal);
620
621 // Helper to append a char directly to the Vec<u8> buffer.
622 // This should be more efficient than using an intermediate stack buffer.
623 let append_char = |buf: &mut Vec<u8>, c: char| {
624 // Reserve space for the character's bytes and write directly into the buffer.
625 let char_len = c.len_utf8();
626 let old_len = buf.len();
627 buf.resize(old_len + char_len, 0);
628 c.encode_utf8(&mut buf[old_len..]);
629 };
630
631 if let Some(c) = first.unescaped {
632 append_char(&mut buf, c);
633 }
634
635 for item in value {
636 let chunk = item?;
637 buf.extend_from_slice(chunk.literal);
638 if let Some(c) = chunk.unescaped {
639 append_char(&mut buf, c);
640 }
641 }
642 Ok(Cow::Owned(buf))
643 }
644 }
645 Some(Err(e)) => Err(e),
646 }
647 }
648}
649
650/// A wrapper struct for implementing `fmt::Display` on an [`Unescape`] iterator.
651pub struct DisplayUnescape<'a> {
652 inner: Unescape<'a>,
653 lossy: bool,
654}
655
656impl<'a> fmt::Display for DisplayUnescape<'a> {
657 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
658 for chunk_result in self.inner.clone() {
659 match chunk_result {
660 Ok(chunk) => {
661 let display_chunk = DisplayUnescapedChunk {
662 chunk: &chunk,
663 lossy: self.lossy,
664 };
665 write!(f, "{}", display_chunk)?;
666 }
667 Err(_) => return Err(fmt::Error), // Signal error to formatter
668 }
669 }
670 Ok(())
671 }
672}
673
674impl<'a> crate::Unescape<'a> {
675 #[inline(always)]
676 pub(crate) fn handle_unicode_escape_from_slice(
677 bytes: &mut &'a [u8],
678 ) -> Result<char, UnescapeError> {
679 let mut iter = bytes.iter();
680 let result = crate::Unescape::handle_unicode_escape(&mut iter);
681 if result.is_ok() {
682 *bytes = iter.as_slice();
683 }
684 result
685 }
686}
687
688// Escape table: maps the byte after '\' to its escaped representation.
689const UNESCAPE_TABLE: [Option<char>; 256] = {
690 let mut tbl: [Option<char>; 256] = [None; 256];
691 tbl[b'"' as usize] = Some('\"');
692 tbl[b'\\' as usize] = Some('\\');
693 tbl[b'/' as usize] = Some('/');
694 tbl[b'b' as usize] = Some('\x08');
695 tbl[b'f' as usize] = Some('\x0C');
696 tbl[b'n' as usize] = Some('\n');
697 tbl[b'r' as usize] = Some('\r');
698 tbl[b't' as usize] = Some('\t');
699 tbl
700};
701
702//==============================================================================
703// Iterator Trait Implementations
704//==============================================================================
705
706#[cfg(feature = "alloc")]
707mod iter_traits {
708 use super::{EscapedChunk, UnescapedChunk};
709 use alloc::string::String;
710 use alloc::vec::Vec;
711
712 /// Collects an iterator of escaped chunks into a single `String`.
713 impl<'a> FromIterator<EscapedChunk<'a>> for String {
714 #[inline]
715 fn from_iter<I: IntoIterator<Item = EscapedChunk<'a>>>(iter: I) -> String {
716 let mut s = String::new();
717 s.extend(iter);
718 s
719 }
720 }
721
722 /// Extends a `String` with an iterator of escaped chunks.
723 impl<'a> Extend<EscapedChunk<'a>> for String {
724 #[inline]
725 fn extend<I: IntoIterator<Item = EscapedChunk<'a>>>(&mut self, iter: I) {
726 for chunk in iter {
727 self.push_str(chunk.literal);
728 if let Some(escaped_str) = chunk.escaped {
729 self.push_str(escaped_str);
730 }
731 }
732 }
733 }
734
735 /// Collects an iterator of unescaped chunks into a byte vector.
736 impl<'a> FromIterator<UnescapedChunk<'a>> for Vec<u8> {
737 #[inline]
738 fn from_iter<I: IntoIterator<Item = UnescapedChunk<'a>>>(iter: I) -> Vec<u8> {
739 let mut buf = Vec::new();
740 buf.extend(iter);
741 buf
742 }
743 }
744
745 /// Extends a byte vector with an iterator of unescaped chunks.
746 impl<'a> Extend<UnescapedChunk<'a>> for Vec<u8> {
747 #[inline]
748 fn extend<I: IntoIterator<Item = UnescapedChunk<'a>>>(&mut self, iter: I) {
749 for chunk in iter {
750 self.extend_from_slice(chunk.literal);
751 if let Some(c) = chunk.unescaped {
752 let char_len = c.len_utf8();
753 let old_len = self.len();
754 self.resize(old_len + char_len, 0);
755 c.encode_utf8(&mut self[old_len..]);
756 }
757 }
758 }
759 }
760}
761
762#[cfg(test)]
763mod tests {
764 use super::*;
765
766 impl<'a> EscapedChunk<'a> {
767 /// Creates a new `EscapedChunk`.
768 const fn new(literal: &'a str, escaped: Option<&'static str>) -> Self {
769 Self { literal, escaped }
770 }
771 }
772
773 impl<'a> UnescapedChunk<'a> {
774 /// Creates a new `UnescapedChunk`.
775 const fn new(literal: &'a [u8], unescaped: Option<char>) -> Self {
776 Self { literal, unescaped }
777 }
778 }
779
780 #[test]
781 fn escape_chunks() {
782 let mut it = escape_str("a\nb\"c");
783 assert_eq!(
784 it.next(),
785 Some(EscapedChunk::new("a", Some(r#"\n"#))),
786 "Chunk 1"
787 );
788 assert_eq!(
789 it.next(),
790 Some(EscapedChunk::new("b", Some(r#"\""#))),
791 "Chunk 2"
792 );
793 assert_eq!(it.next(), Some(EscapedChunk::new("c", None)), "Chunk 3");
794 assert_eq!(it.next(), None, "End of iterator");
795 }
796
797 #[test]
798 fn unescape_chunks() {
799 let mut it = unescape(br"xy\t\u0020z");
800 assert_eq!(
801 it.next().unwrap().unwrap(),
802 UnescapedChunk::new(b"xy", Some('\t')),
803 "Chunk 1"
804 );
805 assert_eq!(
806 it.next().unwrap().unwrap(),
807 UnescapedChunk::new(b"", Some(' ')),
808 "Chunk 2"
809 );
810 assert_eq!(
811 it.next().unwrap().unwrap(),
812 UnescapedChunk::new(b"z", None),
813 "Chunk 3"
814 );
815 assert_eq!(it.next(), None, "End of iterator");
816 }
817
818 #[test]
819 fn test_escape_against_collected_string() {
820 assert_eq!(
821 escape_str("Hello, world!").collect::<String>(),
822 "Hello, world!"
823 );
824 assert_eq!(escape_str("a\"b").collect::<String>(), r#"a\"b"#);
825 assert_eq!(escape_str("\0").collect::<String>(), r#"\u0000"#);
826 assert_eq!(
827 escape_str("path/to/file").collect::<String>(),
828 r#"path/to/file"#
829 );
830
831 escape_str(r#"Unicode test: éàçüö. Emoji: 😀. More symbols: ❤️✅."#).for_each(|_| {});
832 }
833
834 #[test]
835 fn test_unescape_against_collected_string() {
836 assert_eq!(
837 unescape(br"Hello, world!").decode_utf8().unwrap(),
838 "Hello, world!"
839 );
840 assert_eq!(unescape(br"a\nb").decode_utf8().unwrap(), "a\nb");
841 assert_eq!(unescape(br"\uD83D\uDE00").decode_utf8().unwrap(), "😀");
842 }
843
844 #[test]
845 fn unescape_error_propagation() {
846 let mut it = unescape(br"valid\k");
847
848 // A better design: the error is the *only* thing that comes out for that step.
849 // The current implementation bundles the literal with the result of the escape.
850 // Let's stick with that.
851 let first_chunk = it.next().unwrap();
852 assert!(matches!(first_chunk, Err(UnescapeError { .. })));
853 }
854
855 // Inspired by and copied from memchr
856 #[test]
857 fn sync_regression() {
858 use core::panic::{RefUnwindSafe, UnwindSafe};
859
860 fn assert_send_sync<T: Send + Sync + UnwindSafe + RefUnwindSafe>() {}
861 assert_send_sync::<Unescape<'_>>();
862 assert_send_sync::<Escape<'_>>();
863
864 assert_send_sync::<UnescapedChunk<'_>>();
865 assert_send_sync::<EscapedChunk<'_>>();
866 }
867}