json_escape/
explicit.rs

1//! More explicit and fine-grained iterators for JSON escaping and unescaping.
2//!
3//! This module provides an alternative API to the one in the crate root. While the
4//! root API yields slices (`&str` or `&[u8]`) that represent the final output,
5//! this module's iterators yield "chunk" structs. These structs distinguish between
6//! parts of the input that were processed literally and the specific characters
7//! that were escaped or unescaped.
8//!
9//! This approach offers several advantages:
10//! - **Greater Control**: You can inspect each component of the transformation,
11//!   which can be useful for debugging, logging, or more complex data processing.
12//! - **Potential Performance**: By avoiding the need to look up single-byte escape
13//!   sequences in a table on every iteration, some workflows may see a minor
14//!   performance improvement.
15//! - **Clarity**: The structure of the output more closely reflects the transformation
16//!   process, which can make the logic easier to follow.
17//!
18//! # Example: Escaping
19//!
20//! ```
21//! use json_escape::explicit::escape_str;
22//!
23//! let mut escaper = escape_str("a\nb");
24//!
25//! // The first chunk contains the literal "a" and the escaped newline.
26//! let chunk1 = escaper.next().unwrap();
27//! assert_eq!("a", chunk1.literal());
28//! assert_eq!(Some(r#"\n"#), chunk1.escaped());
29//!
30//! // The second chunk contains the literal "b" and no escaped sequence.
31//! let chunk2 = escaper.next().unwrap();
32//! assert_eq!("b", chunk2.literal());
33//! assert_eq!(None, chunk2.escaped());
34//!
35//! // The iterator is now exhausted.
36//! assert!(escaper.next().is_none());
37//! ```
38//!
39//! # Example: Unescaping
40//!
41//! ```
42//! use json_escape::explicit::unescape;
43//!
44//! let mut unescaper = unescape(br"hello\tworld");
45//!
46//! // The first chunk contains the literal "hello" and the unescaped tab.
47//! let chunk1 = unescaper.next().unwrap().unwrap();
48//! assert_eq!(b"hello", chunk1.literal());
49//! assert_eq!(Some('\t'), chunk1.unescaped());
50//!
51//! // The second chunk contains the literal "world" and no unescaped character.
52//! let chunk2 = unescaper.next().unwrap().unwrap();
53//! assert_eq!(b"world", chunk2.literal());
54//! assert_eq!(None, chunk2.unescaped());
55//!
56//! // The iterator is now exhausted.
57//! assert!(unescaper.next().is_none());
58//! ```
59//!
60//! Both `Escape` and `Unescape` iterators provide `display` helpers for easy integration
61//! with Rust's formatting system, preserving the zero-allocation benefits of the main API.
62
63#[cfg(feature = "alloc")]
64use crate::DecodeUtf8Error;
65use crate::{ESCAPE_TABLE, UnescapeError, display_bytes_utf8};
66use crate::{InvalidEscapeError, UnescapeErrorKind, find_escape_char};
67use core::fmt;
68use core::iter::FusedIterator;
69use core::str;
70
71#[cfg(feature = "alloc")]
72use alloc::{borrow::Cow, string::String, vec::Vec};
73
74//==============================================================================
75// Escaping
76//==============================================================================
77
78/// Creates an iterator that yields chunks of an escaped JSON string.
79///
80/// See the [module-level documentation](self) for more details.
81#[inline]
82pub fn escape_str(s: &str) -> Escape<'_> {
83    Escape {
84        bytes: s.as_bytes(),
85    }
86}
87
88/// A chunk of a JSON-escaped string, separating the literal part from the escaped sequence.
89///
90/// This struct is yielded by the [`Escape`] iterator.
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
92pub struct EscapedChunk<'a> {
93    /// A slice of the original input that did not require escaping.
94    literal: &'a str,
95    /// The escaped sequence (e.g., `r#"\n"#`, `r#"\""#`) that immediately follows the literal part.
96    /// Is `None` if this is the last chunk and it has no trailing escape.
97    escaped: Option<&'static str>,
98}
99
100impl<'a> EscapedChunk<'a> {
101    /// Returns the literal part of the chunk, which is a slice of the original string.
102    #[inline]
103    pub const fn literal(&self) -> &'a str {
104        self.literal
105    }
106
107    /// Returns the escaped part of the chunk, if any.
108    #[inline]
109    pub const fn escaped(&self) -> Option<&'static str> {
110        self.escaped
111    }
112}
113
114impl<'a> fmt::Display for EscapedChunk<'a> {
115    #[inline]
116    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
117        f.write_str(self.literal)?;
118        if let Some(s) = self.escaped {
119            f.write_str(s)?;
120        }
121        Ok(())
122    }
123}
124
125/// An iterator over a string that yields [`EscapedChunk`]s.
126///
127/// Created by the [`escape_str`] function.
128#[derive(Clone)]
129#[must_use = "iterators are lazy and do nothing unless consumed"]
130pub struct Escape<'a> {
131    bytes: &'a [u8],
132}
133
134impl<'a> Iterator for Escape<'a> {
135    type Item = EscapedChunk<'a>;
136
137    #[inline]
138    fn next(&mut self) -> Option<Self::Item> {
139        if self.bytes.is_empty() {
140            return None;
141        }
142
143        let pos = find_escape_char(self.bytes).unwrap_or(self.bytes.len());
144        let (literal_bytes, rest) = self.bytes.split_at(pos);
145
146        // SAFETY: `find_escape_char` guarantees `pos` is on a UTF-8 boundary.
147        let literal = unsafe { str::from_utf8_unchecked(literal_bytes) };
148
149        if rest.is_empty() {
150            self.bytes = &[];
151            Some(EscapedChunk {
152                literal,
153                escaped: None,
154            })
155        } else {
156            let escaped_char_byte = rest[0];
157            self.bytes = &rest[1..];
158            Some(EscapedChunk {
159                literal,
160                escaped: Some(
161                    ESCAPE_TABLE[escaped_char_byte as usize]
162                        .expect("find_escape_char found a byte not in ESCAPE_TABLE"),
163                ),
164            })
165        }
166    }
167
168    fn size_hint(&self) -> (usize, Option<usize>) {
169        if self.bytes.is_empty() {
170            (0, Some(0))
171        } else {
172            // We'll yield at least 1 chunk, and at most `len` chunks if every byte is escaped.
173            (1, Some(self.bytes.len()))
174        }
175    }
176}
177
178impl<'a> FusedIterator for Escape<'a> {}
179
180impl<'a> fmt::Display for Escape<'a> {
181    /// This allows the escaped output to be written directly to a formatter
182    /// without intermediate allocation.
183    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
184        for chunk in self.clone() {
185            write!(f, "{chunk}")?;
186        }
187        Ok(())
188    }
189}
190
191impl fmt::Debug for Escape<'_> {
192    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
193        f.debug_struct("Escape").finish_non_exhaustive()
194    }
195}
196
197impl<B: AsRef<[u8]> + ?Sized> PartialEq<B> for Escape<'_> {
198    /// Compares the escaped output with any byte-slice-like object.
199    ///
200    /// This is a convenience for testing, allowing you to check the fully
201    /// concatenated result of an `Escape` iterator against a known `&str` or `&[u8]`.
202    fn eq(&self, other: &B) -> bool {
203        let mut other = other.as_ref();
204        for chunk in self.clone() {
205            // Check literal part
206            if !other.starts_with(chunk.literal.as_bytes()) {
207                return false;
208            }
209            other = &other[chunk.literal.len()..];
210
211            // Check escaped part
212            if let Some(escaped_str) = chunk.escaped {
213                if !other.starts_with(escaped_str.as_bytes()) {
214                    return false;
215                }
216                other = &other[escaped_str.len()..];
217            }
218        }
219        other.is_empty()
220    }
221}
222
223#[cfg(feature = "alloc")]
224impl<'a> From<Escape<'a>> for Cow<'a, str> {
225    /// Efficiently collects the escaped parts into a `Cow<'a, str>`.
226    ///
227    /// This implementation is optimized to avoid allocation if possible:
228    /// - If the input string requires **no escaping**, it returns `Cow::Borrowed`
229    ///   with a slice of the original string.
230    /// - If escaping is needed, it allocates a `String` and returns `Cow::Owned`.
231    fn from(mut iter: Escape<'a>) -> Self {
232        match iter.next() {
233            None => Cow::Borrowed(""),
234            Some(first) => {
235                if first.escaped.is_none() {
236                    // No escape in the first (and only) chunk, so no escaping was needed.
237                    Cow::Borrowed(first.literal)
238                } else {
239                    // Escaping occurred. We must allocate.
240                    let mut s = String::with_capacity(iter.bytes.len() + 16);
241                    s.push_str(first.literal);
242                    s.push_str(first.escaped.unwrap());
243                    for chunk in iter {
244                        s.push_str(chunk.literal);
245                        if let Some(escaped) = chunk.escaped {
246                            s.push_str(escaped);
247                        }
248                    }
249                    Cow::Owned(s)
250                }
251            }
252        }
253    }
254}
255
256//==============================================================================
257// Unescaping
258//==============================================================================
259
260/// Creates an iterator that yields chunks of an unescaped JSON string.
261///
262/// See the [module-level documentation](self) for more details.
263#[inline]
264pub fn unescape<I: AsRef<[u8]> + ?Sized>(input: &I) -> Unescape<'_> {
265    Unescape {
266        bytes: input.as_ref(),
267    }
268}
269
270/// Creates a streaming JSON string unescaper that handles enclosing quotes.
271///
272/// This function is a convenience wrapper around [`unescape`]. If the input byte
273/// slice starts and ends with a double-quote (`"`), the quotes are trimmed
274/// before the content is unescaped.
275///
276/// If the input is not enclosed in quotes, this function behaves identically to
277/// [`unescape`].
278///
279/// # Examples
280///
281/// ```
282/// use json_escape::explicit::unescape_quoted;
283///
284/// // An input string with quotes and an escaped tab.
285/// let bytes = br#""\tline""#;
286/// let mut unescaper = unescape_quoted(bytes);
287///
288/// // The first chunk is the unescaped tab character.
289/// let chunk1 = unescaper.next().unwrap().unwrap();
290/// assert_eq!(b"", chunk1.literal());
291/// assert_eq!(Some('\t'), chunk1.unescaped());
292///
293/// // The second chunk is the literal "line".
294/// let chunk2 = unescaper.next().unwrap().unwrap();
295/// assert_eq!(b"line", chunk2.literal());
296/// assert_eq!(None, chunk2.unescaped());
297///
298/// // The iterator is now exhausted.
299/// assert!(unescaper.next().is_none());
300/// ```
301#[inline]
302pub fn unescape_quoted(bytes: &[u8]) -> Unescape<'_> {
303    let inner = if bytes.len() >= 2 && bytes.first() == Some(&b'"') && bytes.last() == Some(&b'"') {
304        &bytes[1..bytes.len() - 1]
305    } else {
306        bytes
307    };
308    unescape(inner)
309}
310
311/// A chunk of a JSON-unescaped byte slice, separating the literal part from the unescaped character.
312///
313/// This struct is yielded by the [`Unescape`] iterator.
314#[derive(Debug, Clone, Copy, PartialEq, Eq)]
315pub struct UnescapedChunk<'a> {
316    /// A slice of the original input that did not require unescaping.
317    literal: &'a [u8],
318    /// The single character that was unescaped.
319    /// Is `None` if this is the last chunk and it has no trailing unescaped character.
320    unescaped: Option<char>,
321}
322
323impl<'a> UnescapedChunk<'a> {
324    /// Returns the literal part of the chunk, which is a slice of the original bytes.
325    #[inline]
326    pub const fn literal(&self) -> &'a [u8] {
327        self.literal
328    }
329
330    /// Returns the unescaped character, if any.
331    #[inline]
332    pub const fn unescaped(&self) -> Option<char> {
333        self.unescaped
334    }
335
336    /// Returns a displayable wrapper that will format the chunk as a UTF-8 string.
337    ///
338    /// If the literal part of the chunk contains invalid UTF-8 sequences, this
339    /// will result in a `fmt::Error`.
340    pub fn display_utf8(&self) -> DisplayUnescapedChunk<'_> {
341        DisplayUnescapedChunk {
342            chunk: self,
343            lossy: false,
344        }
345    }
346
347    /// Returns a displayable wrapper that will format the chunk as a lossy UTF-8 string.
348    ///
349    /// Any invalid UTF-8 sequences in the literal part of the chunk will be
350    /// replaced with the U+FFFD replacement character.
351    pub fn display_utf8_lossy(&self) -> DisplayUnescapedChunk<'_> {
352        DisplayUnescapedChunk {
353            chunk: self,
354            lossy: true,
355        }
356    }
357}
358
359/// Helper struct for safely displaying an [`UnescapedChunk`].
360pub struct DisplayUnescapedChunk<'a> {
361    chunk: &'a UnescapedChunk<'a>,
362    lossy: bool,
363}
364
365impl<'a> fmt::Display for DisplayUnescapedChunk<'a> {
366    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
367        display_bytes_utf8(self.chunk.literal, f, self.lossy)?;
368        if let Some(c) = self.chunk.unescaped {
369            use fmt::Write as _;
370
371            f.write_char(c)?;
372        }
373        Ok(())
374    }
375}
376
377/// An iterator over a byte slice that yields [`UnescapedChunk`]s.
378///
379/// Created by the [`unescape`] function.
380#[derive(Clone)]
381#[must_use = "iterators are lazy and do nothing unless consumed"]
382pub struct Unescape<'a> {
383    bytes: &'a [u8],
384}
385
386impl<'a> Iterator for Unescape<'a> {
387    type Item = Result<UnescapedChunk<'a>, UnescapeError>;
388
389    #[inline]
390    fn next(&mut self) -> Option<Self::Item> {
391        use memchr::memchr;
392
393        if self.bytes.is_empty() {
394            return None;
395        }
396
397        let pos = match memchr(b'\\', self.bytes) {
398            Some(p) => p,
399            None => {
400                // No more backslashes, yield the rest as a final literal chunk.
401                let chunk = UnescapedChunk {
402                    literal: self.bytes,
403                    unescaped: None,
404                };
405                self.bytes = &[];
406                return Some(Ok(chunk));
407            }
408        };
409
410        let (literal, rest) = self.bytes.split_at(pos);
411        // rest starts with '\\'
412        let mut remainder = &rest[1..];
413
414        let unescaped_char = match remainder.first() {
415            Some(b'u') => {
416                // Temporarily advance past 'u'
417                remainder = &remainder[1..];
418                // Use a helper from the main unescaper, giving it a mutable slice reference
419                // that it can advance.
420                match crate::Unescape::handle_unicode_escape_from_slice(&mut remainder) {
421                    Ok(c) => c,
422                    Err(e) => {
423                        // FIX: handle_unicode_escape_from_slice already handles this for us.
424                        // Adjust offset: error is relative to `\u`, but we need it relative to chunk start.
425                        return Some(Err(e));
426                    }
427                }
428            }
429            Some(&byte) => {
430                remainder = &remainder[1..];
431                match UNESCAPE_TABLE[byte as usize] {
432                    Some(c) => c,
433                    None => {
434                        return Some(Err(UnescapeError {
435                            kind: UnescapeErrorKind::InvalidEscape(InvalidEscapeError {
436                                found: byte,
437                            }),
438                            // The invalid character is 1 byte after '\'.
439                            offset: 1,
440                        }));
441                    }
442                }
443            }
444            None => {
445                return Some(Err(UnescapeError {
446                    kind: UnescapeErrorKind::UnexpectedEof,
447                    // EOF occurred 1 byte after '\'.
448                    offset: 1,
449                }));
450            }
451        };
452
453        self.bytes = remainder;
454        Some(Ok(UnescapedChunk {
455            literal,
456            unescaped: Some(unescaped_char),
457        }))
458    }
459}
460
461impl<'a> FusedIterator for Unescape<'a> {}
462
463impl<'a> Unescape<'a> {
464    /// Decodes the unescaped byte stream into a UTF-8 string.
465    ///
466    /// This method consumes the iterator and collects all resulting byte chunks
467    /// into a `Cow<[u8]>`, which is then validated as UTF-8. If an unescaping
468    /// error occurs, it's returned immediately. If the final sequence of bytes
469    /// is not valid UTF-8, a UTF-8 error is returned.
470    ///
471    /// This is optimized to return a `Cow::Borrowed` if no escapes were present
472    /// in the input, avoiding allocation.
473    ///
474    /// **Requires the `alloc` feature.**
475    ///
476    /// # Example
477    ///
478    /// ```
479    /// # #[cfg(feature = "alloc")] {
480    /// use json_escape::explicit::unescape;
481    ///
482    /// let input = r#"Emoji: \uD83D\uDE00"#;
483    /// let cow = unescape(input).decode_utf8().unwrap();
484    ///
485    /// assert_eq!(cow, "Emoji: 😀");
486    /// # }
487    /// ```
488    #[cfg(feature = "alloc")]
489    pub fn decode_utf8(self) -> Result<Cow<'a, str>, DecodeUtf8Error> {
490        match self.try_into().map_err(DecodeUtf8Error::Unescape)? {
491            Cow::Borrowed(bytes) => str::from_utf8(bytes)
492                .map(Cow::Borrowed)
493                .map_err(DecodeUtf8Error::Utf8),
494            Cow::Owned(bytes) => String::from_utf8(bytes)
495                .map(Cow::Owned)
496                .map_err(|e| DecodeUtf8Error::Utf8(e.utf8_error())),
497        }
498    }
499
500    /// Decodes the unescaped byte stream lossily into a UTF-8 string.
501    ///
502    /// This is similar to [`Unescape::decode_utf8`] but replaces any invalid UTF-8 sequences
503    /// with the replacement character (`U+FFFD`) instead of returning an error.
504    ///
505    /// An `UnescapeError` can still be returned if the JSON escaping itself is invalid.
506    ///
507    /// **Requires the `alloc` feature.**
508    #[cfg(feature = "alloc")]
509    pub fn decode_utf8_lossy(self) -> Result<Cow<'a, str>, UnescapeError> {
510        use crate::decode_utf8_lossy;
511
512        Ok(decode_utf8_lossy(self.try_into()?))
513    }
514
515    /// Returns a wrapper that implements [`fmt::Display`].
516    ///
517    /// If an unescaping error or invalid UTF-8 sequence is encountered,
518    /// a `fmt::Error` is returned, which will cause `format!` and friends to panic.
519    pub fn display_utf8(self) -> DisplayUnescape<'a> {
520        DisplayUnescape {
521            inner: self,
522            lossy: false,
523        }
524    }
525
526    /// Returns a wrapper that implements [`fmt::Display` for lossy UTF-8 decoding.
527    ///
528    /// Invalid UTF-8 sequences will be replaced with the replacement character.
529    /// An unescaping error will still result in a `fmt::Error`.
530    pub fn display_utf8_lossy(self) -> DisplayUnescape<'a> {
531        DisplayUnescape {
532            inner: self,
533            lossy: true,
534        }
535    }
536}
537
538impl fmt::Debug for Unescape<'_> {
539    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
540        f.debug_struct("Unescape").finish_non_exhaustive()
541    }
542}
543
544impl<B: AsRef<[u8]> + ?Sized> PartialEq<B> for Unescape<'_> {
545    /// Compares the unescaped output with a byte-slice-like object.
546    ///
547    /// Returns `true` if the iterator successfully unescapes to produce a byte
548    /// sequence identical to `other`. If an error occurs, returns `false`.
549    fn eq(&self, other: &B) -> bool {
550        let mut other = other.as_ref();
551        let mut char_buf = [0u8; 4];
552
553        for result in self.clone() {
554            match result {
555                Ok(chunk) => {
556                    // Check literal part
557                    if !other.starts_with(chunk.literal) {
558                        return false;
559                    }
560                    other = &other[chunk.literal.len()..];
561
562                    // Check unescaped part
563                    if let Some(c) = chunk.unescaped {
564                        let char_bytes = c.encode_utf8(&mut char_buf);
565                        if !other.starts_with(char_bytes.as_bytes()) {
566                            return false;
567                        }
568                        other = &other[char_bytes.len()..];
569                    }
570                }
571                Err(_) => return false, // An erroring iterator cannot be equal.
572            }
573        }
574        other.is_empty()
575    }
576}
577
578impl<B: AsRef<[u8]>> PartialEq<Unescape<'_>> for Result<B, UnescapeError> {
579    /// Compares the unescaper's outcome with a `Result`.
580    ///
581    /// This allows for precise testing of `Unescape` against either a
582    /// successful outcome (`Ok(bytes)`) or a specific failure (`Err(error)`).
583    fn eq(&self, unescape: &Unescape<'_>) -> bool {
584        match self {
585            Ok(expected_bytes) => unescape == expected_bytes,
586            Err(expected_error) => {
587                for result in unescape.clone() {
588                    if let Err(actual_error) = result {
589                        // The iterator's first error is its final outcome.
590                        return actual_error == *expected_error;
591                    }
592                }
593                // `unescape` completed successfully, but an error was expected.
594                false
595            }
596        }
597    }
598}
599
600#[cfg(feature = "alloc")]
601impl<'a> TryFrom<Unescape<'a>> for Cow<'a, [u8]> {
602    type Error = UnescapeError;
603
604    /// Efficiently collects the unescaped bytes into a `Cow<'a, [u8]>`.
605    ///
606    /// Returns `Cow::Borrowed` if no escape sequences were present, avoiding
607    /// allocation. Otherwise, returns `Cow::Owned`. If an error occurs, it's
608    /// returned immediately.
609    fn try_from(mut value: Unescape<'a>) -> Result<Self, Self::Error> {
610        match value.next() {
611            None => Ok(Cow::Borrowed(b"")),
612            Some(Ok(first)) => {
613                if first.unescaped.is_none() {
614                    // The first and only chunk has no unescaped part. No allocation needed.
615                    Ok(Cow::Borrowed(first.literal))
616                } else {
617                    // An escape was processed. Must allocate and collect the rest.
618                    let mut buf = Vec::with_capacity(value.bytes.len() + 16);
619                    buf.extend_from_slice(first.literal);
620
621                    // Helper to append a char directly to the Vec<u8> buffer.
622                    // This should be more efficient than using an intermediate stack buffer.
623                    let append_char = |buf: &mut Vec<u8>, c: char| {
624                        // Reserve space for the character's bytes and write directly into the buffer.
625                        let char_len = c.len_utf8();
626                        let old_len = buf.len();
627                        buf.resize(old_len + char_len, 0);
628                        c.encode_utf8(&mut buf[old_len..]);
629                    };
630
631                    if let Some(c) = first.unescaped {
632                        append_char(&mut buf, c);
633                    }
634
635                    for item in value {
636                        let chunk = item?;
637                        buf.extend_from_slice(chunk.literal);
638                        if let Some(c) = chunk.unescaped {
639                            append_char(&mut buf, c);
640                        }
641                    }
642                    Ok(Cow::Owned(buf))
643                }
644            }
645            Some(Err(e)) => Err(e),
646        }
647    }
648}
649
650/// A wrapper struct for implementing `fmt::Display` on an [`Unescape`] iterator.
651pub struct DisplayUnescape<'a> {
652    inner: Unescape<'a>,
653    lossy: bool,
654}
655
656impl<'a> fmt::Display for DisplayUnescape<'a> {
657    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
658        for chunk_result in self.inner.clone() {
659            match chunk_result {
660                Ok(chunk) => {
661                    let display_chunk = DisplayUnescapedChunk {
662                        chunk: &chunk,
663                        lossy: self.lossy,
664                    };
665                    write!(f, "{}", display_chunk)?;
666                }
667                Err(_) => return Err(fmt::Error), // Signal error to formatter
668            }
669        }
670        Ok(())
671    }
672}
673
674impl<'a> crate::Unescape<'a> {
675    #[inline(always)]
676    pub(crate) fn handle_unicode_escape_from_slice(
677        bytes: &mut &'a [u8],
678    ) -> Result<char, UnescapeError> {
679        let mut iter = bytes.iter();
680        let result = crate::Unescape::handle_unicode_escape(&mut iter);
681        if result.is_ok() {
682            *bytes = iter.as_slice();
683        }
684        result
685    }
686}
687
688// Escape table: maps the byte after '\' to its escaped representation.
689const UNESCAPE_TABLE: [Option<char>; 256] = {
690    let mut tbl: [Option<char>; 256] = [None; 256];
691    tbl[b'"' as usize] = Some('\"');
692    tbl[b'\\' as usize] = Some('\\');
693    tbl[b'/' as usize] = Some('/');
694    tbl[b'b' as usize] = Some('\x08');
695    tbl[b'f' as usize] = Some('\x0C');
696    tbl[b'n' as usize] = Some('\n');
697    tbl[b'r' as usize] = Some('\r');
698    tbl[b't' as usize] = Some('\t');
699    tbl
700};
701
702//==============================================================================
703// Iterator Trait Implementations
704//==============================================================================
705
706#[cfg(feature = "alloc")]
707mod iter_traits {
708    use super::{EscapedChunk, UnescapedChunk};
709    use alloc::string::String;
710    use alloc::vec::Vec;
711
712    /// Collects an iterator of escaped chunks into a single `String`.
713    impl<'a> FromIterator<EscapedChunk<'a>> for String {
714        #[inline]
715        fn from_iter<I: IntoIterator<Item = EscapedChunk<'a>>>(iter: I) -> String {
716            let mut s = String::new();
717            s.extend(iter);
718            s
719        }
720    }
721
722    /// Extends a `String` with an iterator of escaped chunks.
723    impl<'a> Extend<EscapedChunk<'a>> for String {
724        #[inline]
725        fn extend<I: IntoIterator<Item = EscapedChunk<'a>>>(&mut self, iter: I) {
726            for chunk in iter {
727                self.push_str(chunk.literal);
728                if let Some(escaped_str) = chunk.escaped {
729                    self.push_str(escaped_str);
730                }
731            }
732        }
733    }
734
735    /// Collects an iterator of unescaped chunks into a byte vector.
736    impl<'a> FromIterator<UnescapedChunk<'a>> for Vec<u8> {
737        #[inline]
738        fn from_iter<I: IntoIterator<Item = UnescapedChunk<'a>>>(iter: I) -> Vec<u8> {
739            let mut buf = Vec::new();
740            buf.extend(iter);
741            buf
742        }
743    }
744
745    /// Extends a byte vector with an iterator of unescaped chunks.
746    impl<'a> Extend<UnescapedChunk<'a>> for Vec<u8> {
747        #[inline]
748        fn extend<I: IntoIterator<Item = UnescapedChunk<'a>>>(&mut self, iter: I) {
749            for chunk in iter {
750                self.extend_from_slice(chunk.literal);
751                if let Some(c) = chunk.unescaped {
752                    let char_len = c.len_utf8();
753                    let old_len = self.len();
754                    self.resize(old_len + char_len, 0);
755                    c.encode_utf8(&mut self[old_len..]);
756                }
757            }
758        }
759    }
760}
761
762#[cfg(test)]
763mod tests {
764    use super::*;
765
766    impl<'a> EscapedChunk<'a> {
767        /// Creates a new `EscapedChunk`.
768        const fn new(literal: &'a str, escaped: Option<&'static str>) -> Self {
769            Self { literal, escaped }
770        }
771    }
772
773    impl<'a> UnescapedChunk<'a> {
774        /// Creates a new `UnescapedChunk`.
775        const fn new(literal: &'a [u8], unescaped: Option<char>) -> Self {
776            Self { literal, unescaped }
777        }
778    }
779
780    #[test]
781    fn escape_chunks() {
782        let mut it = escape_str("a\nb\"c");
783        assert_eq!(
784            it.next(),
785            Some(EscapedChunk::new("a", Some(r#"\n"#))),
786            "Chunk 1"
787        );
788        assert_eq!(
789            it.next(),
790            Some(EscapedChunk::new("b", Some(r#"\""#))),
791            "Chunk 2"
792        );
793        assert_eq!(it.next(), Some(EscapedChunk::new("c", None)), "Chunk 3");
794        assert_eq!(it.next(), None, "End of iterator");
795    }
796
797    #[test]
798    fn unescape_chunks() {
799        let mut it = unescape(br"xy\t\u0020z");
800        assert_eq!(
801            it.next().unwrap().unwrap(),
802            UnescapedChunk::new(b"xy", Some('\t')),
803            "Chunk 1"
804        );
805        assert_eq!(
806            it.next().unwrap().unwrap(),
807            UnescapedChunk::new(b"", Some(' ')),
808            "Chunk 2"
809        );
810        assert_eq!(
811            it.next().unwrap().unwrap(),
812            UnescapedChunk::new(b"z", None),
813            "Chunk 3"
814        );
815        assert_eq!(it.next(), None, "End of iterator");
816    }
817
818    #[test]
819    fn test_escape_against_collected_string() {
820        assert_eq!(
821            escape_str("Hello, world!").collect::<String>(),
822            "Hello, world!"
823        );
824        assert_eq!(escape_str("a\"b").collect::<String>(), r#"a\"b"#);
825        assert_eq!(escape_str("\0").collect::<String>(), r#"\u0000"#);
826        assert_eq!(
827            escape_str("path/to/file").collect::<String>(),
828            r#"path/to/file"#
829        );
830
831        escape_str(r#"Unicode test: éàçüö. Emoji: 😀. More symbols: ❤️✅."#).for_each(|_| {});
832    }
833
834    #[test]
835    fn test_unescape_against_collected_string() {
836        assert_eq!(
837            unescape(br"Hello, world!").decode_utf8().unwrap(),
838            "Hello, world!"
839        );
840        assert_eq!(unescape(br"a\nb").decode_utf8().unwrap(), "a\nb");
841        assert_eq!(unescape(br"\uD83D\uDE00").decode_utf8().unwrap(), "😀");
842    }
843
844    #[test]
845    fn unescape_error_propagation() {
846        let mut it = unescape(br"valid\k");
847
848        // A better design: the error is the *only* thing that comes out for that step.
849        // The current implementation bundles the literal with the result of the escape.
850        // Let's stick with that.
851        let first_chunk = it.next().unwrap();
852        assert!(matches!(first_chunk, Err(UnescapeError { .. })));
853    }
854
855    // Inspired by and copied from memchr
856    #[test]
857    fn sync_regression() {
858        use core::panic::{RefUnwindSafe, UnwindSafe};
859
860        fn assert_send_sync<T: Send + Sync + UnwindSafe + RefUnwindSafe>() {}
861        assert_send_sync::<Unescape<'_>>();
862        assert_send_sync::<Escape<'_>>();
863
864        assert_send_sync::<UnescapedChunk<'_>>();
865        assert_send_sync::<EscapedChunk<'_>>();
866    }
867}
json_escape/explicit.rs

json_escape/
explicit.rs