json_escape/
explicit.rs

1//! More explicit and fine-grained iterators for JSON escaping and unescaping.
2//!
3//! This module provides an alternative API to the one in the crate root. While the
4//! root API yields slices (`&str` or `&[u8]`) that represent the final output,
5//! this module's iterators yield "chunk" structs. These structs distinguish between
6//! parts of the input that were processed literally and the specific characters
7//! that were escaped or unescaped.
8//!
9//! This approach offers several advantages:
10//! - **Greater Control**: You can inspect each component of the transformation,
11//!   which can be useful for debugging, logging, or more complex data processing.
12//! - **Potential Performance**: By avoiding the need to look up single-byte escape
13//!   sequences in a table on every iteration, some workflows may see a minor
14//!   performance improvement.
15//! - **Clarity**: The structure of the output more closely reflects the transformation
16//!   process, which can make the logic easier to follow.
17//!
18//! # Example: Escaping
19//!
20//! ```
21//! use json_escape::explicit::escape_str;
22//!
23//! let mut escaper = escape_str("a\nb");
24//!
25//! // The first chunk contains the literal "a" and the escaped newline.
26//! let chunk1 = escaper.next().unwrap();
27//! assert_eq!("a", chunk1.literal());
28//! assert_eq!(Some(r#"\n"#), chunk1.escaped());
29//!
30//! // The second chunk contains the literal "b" and no escaped sequence.
31//! let chunk2 = escaper.next().unwrap();
32//! assert_eq!("b", chunk2.literal());
33//! assert_eq!(None, chunk2.escaped());
34//!
35//! // The iterator is now exhausted.
36//! assert!(escaper.next().is_none());
37//! ```
38//!
39//! # Example: Unescaping
40//!
41//! ```
42//! use json_escape::explicit::unescape;
43//!
44//! let mut unescaper = unescape(br"hello\tworld");
45//!
46//! // The first chunk contains the literal "hello" and the unescaped tab.
47//! let chunk1 = unescaper.next().unwrap().unwrap();
48//! assert_eq!(b"hello", chunk1.literal());
49//! assert_eq!(Some('\t'), chunk1.unescaped());
50//!
51//! // The second chunk contains the literal "world" and no unescaped character.
52//! let chunk2 = unescaper.next().unwrap().unwrap();
53//! assert_eq!(b"world", chunk2.literal());
54//! assert_eq!(None, chunk2.unescaped());
55//!
56//! // The iterator is now exhausted.
57//! assert!(unescaper.next().is_none());
58//! ```
59//!
60//! Both `Escape` and `Unescape` iterators provide `display` helpers for easy integration
61//! with Rust's formatting system, preserving the zero-allocation benefits of the main API.
62
63#[cfg(feature = "alloc")]
64use crate::DecodeUtf8Error;
65use crate::{ESCAPE_TABLE, InvalidHexError, LoneSurrogateError, UnescapeError, display_bytes_utf8};
66use crate::{InvalidEscapeError, UnescapeErrorKind, find_escape_char};
67use core::fmt;
68use core::iter::FusedIterator;
69use core::str;
70
71#[cfg(feature = "alloc")]
72use alloc::{borrow::Cow, string::String, vec::Vec};
73
74//==============================================================================
75// Escaping
76//==============================================================================
77
78/// Creates an iterator that yields chunks of an escaped JSON string.
79///
80/// See the [module-level documentation](self) for more details.
81#[inline]
82pub fn escape_str(s: &str) -> Escape<'_> {
83    Escape {
84        bytes: s.as_bytes(),
85    }
86}
87
88/// A chunk of a JSON-escaped string, separating the literal part from the escaped sequence.
89///
90/// This struct is yielded by the [`Escape`] iterator.
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
92pub struct EscapedChunk<'a> {
93    /// A slice of the original input that did not require escaping.
94    literal: &'a str,
95    /// The escaped sequence (e.g., `r#"\n"#`, `r#"\""#`) that immediately follows the literal part.
96    /// Is `None` if this is the last chunk and it has no trailing escape.
97    escaped: Option<&'static str>,
98}
99
100impl<'a> EscapedChunk<'a> {
101    /// Returns the literal part of the chunk, which is a slice of the original string.
102    #[inline]
103    pub const fn literal(&self) -> &'a str {
104        self.literal
105    }
106
107    /// Returns the escaped part of the chunk, if any.
108    #[inline]
109    pub const fn escaped(&self) -> Option<&'static str> {
110        self.escaped
111    }
112}
113
114impl<'a> fmt::Display for EscapedChunk<'a> {
115    #[inline]
116    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
117        f.write_str(self.literal)?;
118        if let Some(s) = self.escaped {
119            f.write_str(s)?;
120        }
121        Ok(())
122    }
123}
124
125/// An iterator over a string that yields [`EscapedChunk`]s.
126///
127/// Created by the [`escape_str`] function.
128#[derive(Clone)]
129#[must_use = "iterators are lazy and do nothing unless consumed"]
130pub struct Escape<'a> {
131    pub(crate) bytes: &'a [u8],
132}
133
134impl<'a> Iterator for Escape<'a> {
135    type Item = EscapedChunk<'a>;
136
137    #[inline]
138    fn next(&mut self) -> Option<Self::Item> {
139        if self.bytes.is_empty() {
140            return None;
141        }
142
143        let pos = find_escape_char(self.bytes).unwrap_or(self.bytes.len());
144        let (literal_bytes, rest) = self.bytes.split_at(pos);
145
146        // SAFETY: `find_escape_char` guarantees `pos` is on a UTF-8 boundary.
147        let literal = unsafe { str::from_utf8_unchecked(literal_bytes) };
148
149        if rest.is_empty() {
150            self.bytes = &self.bytes[self.bytes.len()..];
151            Some(EscapedChunk {
152                literal,
153                escaped: None,
154            })
155        } else {
156            let escaped_char_byte = rest[0];
157            self.bytes = &rest[1..];
158            Some(EscapedChunk {
159                literal,
160                escaped: Some(
161                    ESCAPE_TABLE[escaped_char_byte as usize]
162                        .expect("find_escape_char found a byte not in ESCAPE_TABLE"),
163                ),
164            })
165        }
166    }
167
168    fn size_hint(&self) -> (usize, Option<usize>) {
169        if self.bytes.is_empty() {
170            (0, Some(0))
171        } else {
172            // We'll yield at least 1 chunk, and at most `len` chunks if every byte is escaped.
173            (1, Some(self.bytes.len()))
174        }
175    }
176}
177
178impl<'a> FusedIterator for Escape<'a> {}
179
180impl<'a> fmt::Display for Escape<'a> {
181    /// This allows the escaped output to be written directly to a formatter
182    /// without intermediate allocation.
183    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
184        for chunk in self.clone() {
185            write!(f, "{chunk}")?;
186        }
187        Ok(())
188    }
189}
190
191impl fmt::Debug for Escape<'_> {
192    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
193        f.debug_struct("Escape").finish_non_exhaustive()
194    }
195}
196
197impl<B: AsRef<[u8]> + ?Sized> PartialEq<B> for Escape<'_> {
198    /// Compares the escaped output with any byte-slice-like object.
199    ///
200    /// This is a convenience for testing, allowing you to check the fully
201    /// concatenated result of an `Escape` iterator against a known `&str` or `&[u8]`.
202    fn eq(&self, other: &B) -> bool {
203        let mut other = other.as_ref();
204        for chunk in self.clone() {
205            // Check literal part
206            if !other.starts_with(chunk.literal.as_bytes()) {
207                return false;
208            }
209            other = &other[chunk.literal.len()..];
210
211            // Check escaped part
212            if let Some(escaped_str) = chunk.escaped {
213                if !other.starts_with(escaped_str.as_bytes()) {
214                    return false;
215                }
216                other = &other[escaped_str.len()..];
217            }
218        }
219        other.is_empty()
220    }
221}
222
223impl<'a, 'b> PartialEq<Escape<'a>> for Escape<'b> {
224    /// Compares two `Escape` iterators for equality.
225    ///
226    /// Two `Escape` iterators are considered equal if they'll produce the same **output**.
227    /// It first performs a fast check on the underlying byte slices.
228    fn eq(&self, other: &Escape<'a>) -> bool {
229        // The crate parallel is easier
230        crate::Escape { bytes: self.bytes } == crate::Escape { bytes: other.bytes }
231    }
232}
233
234#[cfg(feature = "alloc")]
235impl<'a> From<Escape<'a>> for Cow<'a, str> {
236    /// Efficiently collects the escaped parts into a `Cow<'a, str>`.
237    ///
238    /// This implementation is optimized to avoid allocation if possible:
239    /// - If the input string requires **no escaping**, it returns `Cow::Borrowed`
240    ///   with a slice of the original string.
241    /// - If escaping is needed, it allocates a `String` and returns `Cow::Owned`.
242    fn from(mut iter: Escape<'a>) -> Self {
243        match iter.next() {
244            None => Cow::Borrowed(""),
245            Some(first) => {
246                if first.escaped.is_none() {
247                    // No escape in the first (and only) chunk, so no escaping was needed.
248                    Cow::Borrowed(first.literal)
249                } else {
250                    // Escaping occurred. We must allocate.
251                    let mut s = String::with_capacity(iter.bytes.len() + 16);
252                    s.push_str(first.literal);
253                    s.push_str(first.escaped.unwrap());
254                    s.extend(iter);
255                    Cow::Owned(s)
256                }
257            }
258        }
259    }
260}
261
262//==============================================================================
263// Unescaping
264//==============================================================================
265
266/// Creates an iterator that yields chunks of an unescaped JSON string.
267///
268/// See the [module-level documentation](self) for more details.
269#[inline]
270pub fn unescape<I: AsRef<[u8]> + ?Sized>(input: &I) -> Unescape<'_> {
271    Unescape {
272        bytes: input.as_ref(),
273    }
274}
275
276/// Creates a streaming JSON string unescaper that handles enclosing quotes.
277///
278/// This function is a convenience wrapper around [`unescape`]. If the input byte
279/// slice starts and ends with a double-quote (`"`), the quotes are trimmed
280/// before the content is unescaped.
281///
282/// If the input is not enclosed in quotes, this function behaves identically to
283/// [`unescape`].
284///
285/// # Examples
286///
287/// ```
288/// use json_escape::explicit::unescape_quoted;
289///
290/// // An input string with quotes and an escaped tab.
291/// let bytes = br#""\tline""#;
292/// let mut unescaper = unescape_quoted(bytes);
293///
294/// // The first chunk is the unescaped tab character.
295/// let chunk1 = unescaper.next().unwrap().unwrap();
296/// assert_eq!(b"", chunk1.literal());
297/// assert_eq!(Some('\t'), chunk1.unescaped());
298///
299/// // The second chunk is the literal "line".
300/// let chunk2 = unescaper.next().unwrap().unwrap();
301/// assert_eq!(b"line", chunk2.literal());
302/// assert_eq!(None, chunk2.unescaped());
303///
304/// // The iterator is now exhausted.
305/// assert!(unescaper.next().is_none());
306/// ```
307#[inline]
308pub fn unescape_quoted(bytes: &[u8]) -> Unescape<'_> {
309    let inner = if bytes.len() >= 2 && bytes.first() == Some(&b'"') && bytes.last() == Some(&b'"') {
310        &bytes[1..bytes.len() - 1]
311    } else {
312        bytes
313    };
314    unescape(inner)
315}
316
317/// A chunk of a JSON-unescaped byte slice, separating the literal part from the unescaped character.
318///
319/// This struct is yielded by the [`Unescape`] iterator.
320#[derive(Debug, Clone, Copy, PartialEq, Eq)]
321pub struct UnescapedChunk<'a> {
322    /// A slice of the original input that did not require unescaping.
323    pub(crate) literal: &'a [u8],
324    /// The single character that was unescaped.
325    /// Is `None` if this is the last chunk and it has no trailing unescaped character.
326    pub(crate) unescaped: Option<char>,
327}
328
329impl<'a> UnescapedChunk<'a> {
330    /// Returns the literal part of the chunk, which is a slice of the original bytes.
331    #[inline]
332    pub const fn literal(&self) -> &'a [u8] {
333        self.literal
334    }
335
336    /// Returns the unescaped character, if any.
337    #[inline]
338    pub const fn unescaped(&self) -> Option<char> {
339        self.unescaped
340    }
341
342    /// Returns a displayable wrapper that will format the chunk as a UTF-8 string.
343    ///
344    /// If the literal part of the chunk contains invalid UTF-8 sequences, this
345    /// will result in a `fmt::Error`.
346    pub fn display_utf8(&self) -> DisplayUnescapedChunk<'_> {
347        DisplayUnescapedChunk {
348            chunk: self,
349            lossy: false,
350        }
351    }
352
353    /// Returns a displayable wrapper that will format the chunk as a lossy UTF-8 string.
354    ///
355    /// Any invalid UTF-8 sequences in the literal part of the chunk will be
356    /// replaced with the U+FFFD replacement character.
357    pub fn display_utf8_lossy(&self) -> DisplayUnescapedChunk<'_> {
358        DisplayUnescapedChunk {
359            chunk: self,
360            lossy: true,
361        }
362    }
363}
364
365/// Helper struct for safely displaying an [`UnescapedChunk`].
366pub struct DisplayUnescapedChunk<'a> {
367    chunk: &'a UnescapedChunk<'a>,
368    lossy: bool,
369}
370
371impl<'a> fmt::Display for DisplayUnescapedChunk<'a> {
372    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
373        display_bytes_utf8(self.chunk.literal, f, self.lossy)?;
374        if let Some(c) = self.chunk.unescaped {
375            use fmt::Write as _;
376
377            f.write_char(c)?;
378        }
379        Ok(())
380    }
381}
382
383/// An iterator over a byte slice that yields [`UnescapedChunk`]s.
384///
385/// Created by the [`unescape`] function.
386#[derive(Clone)]
387#[must_use = "iterators are lazy and do nothing unless consumed"]
388pub struct Unescape<'a> {
389    pub(crate) bytes: &'a [u8],
390}
391
392impl<'a> Iterator for Unescape<'a> {
393    type Item = Result<UnescapedChunk<'a>, UnescapeError>;
394
395    #[inline]
396    fn next(&mut self) -> Option<Self::Item> {
397        use memchr::memchr;
398
399        if self.bytes.is_empty() {
400            return None;
401        }
402
403        let pos = match memchr(b'\\', self.bytes) {
404            Some(p) => p,
405            None => {
406                // No more backslashes, yield the rest as a final literal chunk.
407                let chunk = UnescapedChunk {
408                    literal: self.bytes,
409                    unescaped: None,
410                };
411                self.bytes = &self.bytes[self.bytes.len()..]; // fix: totalk
412                return Some(Ok(chunk));
413            }
414        };
415
416        let (literal, rest) = self.bytes.split_at(pos);
417        // rest starts with '\\'
418        let mut remainder = &rest[1..];
419
420        let unescaped_char = match remainder.first() {
421            Some(b'u') => {
422                // Temporarily advance past 'u'
423                remainder = &remainder[1..];
424                // Use a helper from the main unescaper, giving it a mutable slice reference
425                // that it can advance.
426                match Self::handle_unicode_escape(&mut remainder) {
427                    Ok(c) => c,
428                    Err(e) => {
429                        // FIX: handle_unicode_escape_from_slice already handles this for us.
430                        // Adjust offset: error is relative to `\u`, but we need it relative to chunk start.
431                        return Some(Err(e));
432                    }
433                }
434            }
435            Some(&byte) => {
436                remainder = &remainder[1..];
437                match UNESCAPE_TABLE[byte as usize] {
438                    Some(c) => c,
439                    None => {
440                        return Some(Err(UnescapeError {
441                            kind: UnescapeErrorKind::InvalidEscape(InvalidEscapeError {
442                                found: byte,
443                            }),
444                            // The invalid character is 1 byte after '\'.
445                            offset: 1,
446                        }));
447                    }
448                }
449            }
450            None => {
451                return Some(Err(UnescapeError {
452                    kind: UnescapeErrorKind::UnexpectedEof,
453                    // EOF occurred 1 byte after '\'.
454                    offset: 1,
455                }));
456            }
457        };
458
459        self.bytes = remainder;
460        Some(Ok(UnescapedChunk {
461            literal,
462            unescaped: Some(unescaped_char),
463        }))
464    }
465
466    fn size_hint(&self) -> (usize, Option<usize>) {
467        if self.bytes.is_empty() {
468            (0, Some(0))
469        } else {
470            // Worst-case is \uXXXX -> 1 byte, so 6 -> 1.
471            (
472                self.bytes.len().saturating_add(1) / 6,
473                Some(self.bytes.len()),
474            )
475        }
476    }
477}
478
479impl<'a> FusedIterator for Unescape<'a> {}
480
481impl<'a> Unescape<'a> {
482    /// Decodes the unescaped byte stream into a UTF-8 string.
483    ///
484    /// This method consumes the iterator and collects all resulting byte chunks
485    /// into a `Cow<[u8]>`, which is then validated as UTF-8. If an unescaping
486    /// error occurs, it's returned immediately. If the final sequence of bytes
487    /// is not valid UTF-8, a UTF-8 error is returned.
488    ///
489    /// This is optimized to return a `Cow::Borrowed` if no escapes were present
490    /// in the input, avoiding allocation.
491    ///
492    /// **Requires the `alloc` feature.**
493    ///
494    /// # Example
495    ///
496    /// ```
497    /// # #[cfg(feature = "alloc")] {
498    /// use json_escape::explicit::unescape;
499    ///
500    /// let input = r#"Emoji: \uD83D\uDE00"#;
501    /// let cow = unescape(input).decode_utf8().unwrap();
502    ///
503    /// assert_eq!(cow, "Emoji: 😀");
504    /// # }
505    /// ```
506    #[cfg(feature = "alloc")]
507    pub fn decode_utf8(self) -> Result<Cow<'a, str>, DecodeUtf8Error> {
508        match self.try_into().map_err(DecodeUtf8Error::Unescape)? {
509            Cow::Borrowed(bytes) => str::from_utf8(bytes)
510                .map(Cow::Borrowed)
511                .map_err(DecodeUtf8Error::Utf8),
512            Cow::Owned(bytes) => String::from_utf8(bytes)
513                .map(Cow::Owned)
514                .map_err(|e| DecodeUtf8Error::Utf8(e.utf8_error())),
515        }
516    }
517
518    /// Decodes the unescaped byte stream lossily into a UTF-8 string.
519    ///
520    /// This is similar to [`Unescape::decode_utf8`] but replaces any invalid UTF-8 sequences
521    /// with the replacement character (`U+FFFD`) instead of returning an error.
522    ///
523    /// An `UnescapeError` can still be returned if the JSON escaping itself is invalid.
524    ///
525    /// **Requires the `alloc` feature.**
526    #[cfg(feature = "alloc")]
527    pub fn decode_utf8_lossy(self) -> Result<Cow<'a, str>, UnescapeError> {
528        use crate::decode_utf8_lossy;
529
530        Ok(decode_utf8_lossy(self.try_into()?))
531    }
532
533    /// Returns a wrapper that implements [`fmt::Display`].
534    ///
535    /// If an unescaping error or invalid UTF-8 sequence is encountered,
536    /// a `fmt::Error` is returned, which will cause `format!` and friends to panic.
537    pub fn display_utf8(self) -> DisplayUnescape<'a> {
538        DisplayUnescape {
539            inner: self,
540            lossy: false,
541        }
542    }
543
544    /// Returns a wrapper that implements [`fmt::Display` for lossy UTF-8 decoding.
545    ///
546    /// Invalid UTF-8 sequences will be replaced with the replacement character.
547    /// An unescaping error will still result in a `fmt::Error`.
548    pub fn display_utf8_lossy(self) -> DisplayUnescape<'a> {
549        DisplayUnescape {
550            inner: self,
551            lossy: true,
552        }
553    }
554
555    /// Parses a unicode escape sequence `\uXXXX` which may be a surrogate pair.
556    /// The input slice `bytes` must be positioned *after* the `\u`.
557    ///
558    /// On success, returns the parsed `char` and advances the slice.
559    /// On error, returns an `Err` and the input slice is not modified.
560    #[inline(always)]
561    fn handle_unicode_escape(bytes: &mut &'a [u8]) -> Result<char, UnescapeError> {
562        // Parse first 4 hex digits (\uXXXX)
563        //
564        // The slice starts *after* '\u'. The first hex digit is at offset 2 from '\'.
565        let first = Self::parse_hex4(bytes, 2)?;
566
567        // High surrogate → must be followed by another \uXXXX low surrogate
568        if (0xD800..=0xDBFF).contains(&first) {
569            let remaining = &bytes[4..];
570
571            const N: usize = b"\\u".len();
572
573            // EOF before even seeing '\' or 'u' → UnexpectedEof
574            if remaining.len() < N {
575                return Err(UnescapeError {
576                    kind: UnescapeErrorKind::UnexpectedEof,
577                    offset: 6,
578                });
579            }
580
581            // Check for a following `\u` and enough bytes for the second hex sequence.
582            if b"\\u" == &remaining[..N] {
583                // Try parsing the low surrogate. The slice is advanced by 2 for the `\u`.
584                // The first hex digit of the second escape is at offset 8.
585                // (\uXXXX\u -> 8 chars from the initial '\')
586                match Self::parse_hex4(&remaining[2..], 8) {
587                    Ok(low) if (0xDC00..=0xDFFF).contains(&low) => {
588                        // We found a valid low surrogate. Combine them.
589                        let high_t = first as u32;
590                        let low_t = low as u32;
591                        let code = 0x10000 + (((high_t - 0xD800) << 10) | (low_t - 0xDC00));
592                        let result_char = char::from_u32(code)
593                            .expect("valid surrogate pair math should always produce a valid char");
594
595                        // SUCCESS: Advance the original slice past the entire surrogate pair (\uXXXX\uXXXX).
596                        *bytes = &remaining[6..]; // Consumes 4 + 2 + 4 = 10 bytes total from the original slice
597                        return Ok(result_char);
598                    }
599                    Ok(_) => {
600                        // Got a full escape but not a low surrogate → Lone surrogate
601                        return Err(UnescapeError {
602                            kind: UnescapeErrorKind::LoneSurrogate(LoneSurrogateError {
603                                surrogate: first,
604                            }),
605                            offset: 6,
606                        });
607                    }
608                    Err(err) => {
609                        // parse_hex4 failed for the second part.
610                        return Err(err);
611                    }
612                }
613            } else {
614                // High surrogate was not followed by a `\u` sequence.
615                return Err(UnescapeError {
616                    kind: UnescapeErrorKind::LoneSurrogate(LoneSurrogateError { surrogate: first }),
617                    // The error is detected after consuming `\uXXXX` (6 bytes total from '\').
618                    offset: 6,
619                });
620            }
621        }
622
623        // Not a surrogate → normal path
624        match char::from_u32(first as u32) {
625            Some(c) => {
626                // SUCCESS: Advance the original slice past the 4 hex digits.
627                *bytes = &bytes[4..];
628                Ok(c)
629            }
630            None => Err(UnescapeError {
631                // The parsed value is not a valid char (e.g., a lone low surrogate).
632                kind: UnescapeErrorKind::LoneSurrogate(LoneSurrogateError { surrogate: first }),
633                // The error is detected after consuming `\uXXXX` (6 bytes total from '\').
634                offset: 6,
635            }),
636        }
637    }
638
639    /// Parses 4 hex digits, optimized for the success path.
640    #[inline(always)]
641    fn parse_hex4(slice: &[u8], base_offset: u8) -> Result<u16, UnescapeError> {
642        // --- HOT PATH ---
643        // This is the path we expect to take most of the time.
644        if let Some(chunk) = slice.get(..4) {
645            // By slicing to 4, we've performed a single bounds check.
646            // The compiler now knows any access from chunk[0] to chunk[3] is safe,
647            // so it will not generate additional bounds checks.
648
649            // We can now safely access the bytes.
650            let b0 = chunk[0];
651            let b1 = chunk[1];
652            let b2 = chunk[2];
653            let b3 = chunk[3];
654
655            // Use the LUT to get the values.
656            if let (Some(v0), Some(v1), Some(v2), Some(v3)) = (
657                HEX[b0 as usize],
658                HEX[b1 as usize],
659                HEX[b2 as usize],
660                HEX[b3 as usize],
661            ) {
662                // All characters are valid hex, combine and return.
663                let result = (v0 as u16) << 12 | (v1 as u16) << 8 | (v2 as u16) << 4 | (v3 as u16);
664                return Ok(result);
665            }
666
667            // If we're here, it means the slice was long enough, but one
668            // of the characters was not a valid hex digit. Fall through to the cold path
669            // to correctly identify which character was invalid.
670        }
671
672        // --- COLD PATH ---
673        // This path handles all errors. It's marked as `#[cold]` to hint to the
674        // compiler that it's less frequently executed.
675        #[cold]
676        fn handle_error(slice: &[u8], base_offset: u8) -> UnescapeError {
677            // Loop through the bytes we *do* have.
678            for (i, &b) in slice.iter().enumerate() {
679                if HEX[b as usize].is_none() {
680                    // We found an invalid hex character before running out of bytes.
681                    return UnescapeError {
682                        kind: UnescapeErrorKind::InvalidHex(InvalidHexError { found: b }),
683                        offset: base_offset + i as u8,
684                    };
685                }
686            }
687
688            // If the loop completes, all available characters were valid,
689            // but there weren't enough of them.
690            UnescapeError {
691                kind: UnescapeErrorKind::UnexpectedEof,
692                // The error is at the position of the first *missing* character.
693                offset: base_offset + slice.len() as u8,
694            }
695        }
696
697        Err(handle_error(slice, base_offset))
698    }
699}
700
701impl fmt::Debug for Unescape<'_> {
702    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
703        f.debug_struct("Unescape").finish_non_exhaustive()
704    }
705}
706
707impl<B: AsRef<[u8]> + ?Sized> PartialEq<B> for Unescape<'_> {
708    /// Compares the unescaped output with a byte-slice-like object.
709    ///
710    /// Returns `true` if the iterator successfully unescapes to produce a byte
711    /// sequence identical to `other`. If an error occurs, returns `false`.
712    fn eq(&self, other: &B) -> bool {
713        let mut other = other.as_ref();
714        let mut char_buf = [0u8; 4];
715
716        for result in self.clone() {
717            match result {
718                Ok(chunk) => {
719                    // Check literal part
720                    if !other.starts_with(chunk.literal) {
721                        return false;
722                    }
723                    other = &other[chunk.literal.len()..];
724
725                    // Check unescaped part
726                    if let Some(c) = chunk.unescaped {
727                        let char_bytes = c.encode_utf8(&mut char_buf);
728                        if !other.starts_with(char_bytes.as_bytes()) {
729                            return false;
730                        }
731                        other = &other[char_bytes.len()..];
732                    }
733                }
734                Err(_) => return false, // An erroring iterator cannot be equal.
735            }
736        }
737        other.is_empty()
738    }
739}
740
741impl<B: AsRef<[u8]>> PartialEq<Unescape<'_>> for Result<B, UnescapeError> {
742    /// Compares the unescaper's outcome with a `Result`.
743    ///
744    /// This allows for precise testing of `Unescape` against either a
745    /// successful outcome (`Ok(bytes)`) or a specific failure (`Err(error)`).
746    fn eq(&self, unescape: &Unescape<'_>) -> bool {
747        match self {
748            Ok(expected_bytes) => unescape == expected_bytes,
749            Err(expected_error) => {
750                for result in unescape.clone() {
751                    if let Err(actual_error) = result {
752                        // The iterator's first error is its final outcome.
753                        return actual_error == *expected_error;
754                    }
755                }
756                // `unescape` completed successfully, but an error was expected.
757                false
758            }
759        }
760    }
761}
762
763impl<'a, 'b> PartialEq<Unescape<'a>> for Unescape<'b> {
764    /// Compares two `Unescape` iterators for equality based on their terminal result.
765    ///
766    /// The equality of two `Unescape` iterators is determined by the final `Result`
767    /// that would be obtained if each iterator were fully consumed (e.g., by using `try_collect()`).
768    ///
769    /// The specific rules are as follows:
770    ///
771    /// 1.  **Error vs. Error**: If both iterators terminate with an `Err`, they are
772    ///     considered **equal** if and only if their `UnescapeError`s are identical.
773    ///     Any bytes successfully unescaped *before* the error are ignored in this case.
774    /// 2.  **Success vs. Success**: If both iterators terminate with `Ok`, they are
775    ///     considered **equal** if and only if the complete sequence of unescaped bytes
776    ///     is identical for both.
777    /// 3.  **Success vs. Error**: If one iterator terminates with `Ok` and the other
778    ///     with `Err`, they are always **not equal**.
779    ///
780    /// # Example
781    ///
782    /// ```
783    /// use json_escape::explicit::unescape;
784    ///
785    /// // Case 1: Both iterators produce the same error. They are equal,
786    /// // even though their valid prefixes ("a" and "b") are different.
787    /// let failing_a = unescape(r#"a\k"#);
788    /// let failing_b = unescape(r#"b\k"#);
789    /// assert_eq!(failing_a, failing_b);
790    ///
791    /// // Case 2: Both iterators succeed. Equality depends on the byte stream.
792    /// let successful_a = unescape(r#"hello\nworld"#);
793    /// let successful_b = unescape(r#"hello\nworld"#);
794    /// assert_eq!(successful_a, successful_b);
795    ///
796    /// let successful_c = unescape(r#"different"#);
797    /// assert_ne!(successful_a, successful_c);
798    ///
799    /// // Case 3: One succeeds and one fails. They are not equal.
800    /// let succeeding = unescape(r#"stop"#);
801    /// let failing = unescape(r#"stop\k"#);
802    /// assert_ne!(succeeding, failing);
803    ///
804    /// // Case 4: Both iterators fail differently. They are not equal.
805    /// let failing_a = unescape(r#"data:\k"#);
806    /// let failing_b = unescape(r#"data:\"#);
807    /// assert_ne!(failing_a, failing_b);
808    /// ```
809    fn eq(&self, other: &Unescape<'a>) -> bool {
810        // The crate parallel is easier
811        crate::unescape(self.bytes) == crate::unescape(other.bytes)
812    }
813}
814
815#[cfg(feature = "alloc")]
816impl<'a> TryFrom<Unescape<'a>> for Cow<'a, [u8]> {
817    type Error = UnescapeError;
818
819    /// Efficiently collects the unescaped bytes into a `Cow<'a, [u8]>`.
820    ///
821    /// Returns `Cow::Borrowed` if no escape sequences were present, avoiding
822    /// allocation. Otherwise, returns `Cow::Owned`. If an error occurs, it's
823    /// returned immediately.
824    fn try_from(mut value: Unescape<'a>) -> Result<Self, Self::Error> {
825        match value.next() {
826            None => Ok(Cow::Borrowed(b"")),
827            Some(Ok(first)) => {
828                if first.unescaped.is_none() {
829                    // The first and only chunk has no unescaped part. No allocation needed.
830                    Ok(Cow::Borrowed(first.literal))
831                } else {
832                    // An escape was processed. Must allocate and collect the rest.
833                    let mut buf = Vec::with_capacity(value.bytes.len() + 16);
834                    buf.extend_from_slice(first.literal);
835
836                    // Helper to append a char directly to the Vec<u8> buffer.
837                    // This should be more efficient than using an intermediate stack buffer.
838                    let append_char = |buf: &mut Vec<u8>, c: char| {
839                        // Reserve space for the character's bytes and write directly into the buffer.
840                        let char_len = c.len_utf8();
841                        let old_len = buf.len();
842                        buf.resize(old_len + char_len, 0);
843                        c.encode_utf8(&mut buf[old_len..]);
844                    };
845
846                    if let Some(c) = first.unescaped {
847                        append_char(&mut buf, c);
848                    }
849
850                    for item in value {
851                        let chunk = item?;
852                        buf.extend_from_slice(chunk.literal);
853                        if let Some(c) = chunk.unescaped {
854                            append_char(&mut buf, c);
855                        }
856                    }
857                    Ok(Cow::Owned(buf))
858                }
859            }
860            Some(Err(e)) => Err(e),
861        }
862    }
863}
864
865/// A wrapper struct for implementing `fmt::Display` on an [`Unescape`] iterator.
866pub struct DisplayUnescape<'a> {
867    inner: Unescape<'a>,
868    lossy: bool,
869}
870
871impl<'a> fmt::Display for DisplayUnescape<'a> {
872    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
873        for chunk_result in self.inner.clone() {
874            match chunk_result {
875                Ok(chunk) => {
876                    let display_chunk = DisplayUnescapedChunk {
877                        chunk: &chunk,
878                        lossy: self.lossy,
879                    };
880                    write!(f, "{}", display_chunk)?;
881                }
882                Err(_) => return Err(fmt::Error), // Signal error to formatter
883            }
884        }
885        Ok(())
886    }
887}
888
889// Escape table: maps the byte after '\' to its escaped representation.
890const UNESCAPE_TABLE: [Option<char>; 256] = {
891    let mut tbl: [Option<char>; 256] = [None; 256];
892    tbl[b'"' as usize] = Some('\"');
893    tbl[b'\\' as usize] = Some('\\');
894    tbl[b'/' as usize] = Some('/');
895    tbl[b'b' as usize] = Some('\x08');
896    tbl[b'f' as usize] = Some('\x0C');
897    tbl[b'n' as usize] = Some('\n');
898    tbl[b'r' as usize] = Some('\r');
899    tbl[b't' as usize] = Some('\t');
900    tbl
901};
902
903// --- Look-Up Table for Hex Decoding ---
904const HEX: [Option<u8>; 256] = {
905    let mut table = [None; 256];
906    let mut i = 0;
907    while i < 256 {
908        table[i] = match i as u8 {
909            b'0'..=b'9' => Some(i as u8 - b'0'),
910            b'a'..=b'f' => Some(i as u8 - b'a' + 10),
911            b'A'..=b'F' => Some(i as u8 - b'A' + 10),
912            _ => None,
913        };
914        i += 1;
915    }
916    table
917};
918
919//==============================================================================
920// Iterator Trait Implementations
921//==============================================================================
922
923#[cfg(feature = "alloc")]
924mod iter_traits {
925    use super::{EscapedChunk, UnescapedChunk};
926    use alloc::string::String;
927    use alloc::vec::Vec;
928
929    /// Collects an iterator of escaped chunks into a single `String`.
930    impl<'a> FromIterator<EscapedChunk<'a>> for String {
931        #[inline]
932        fn from_iter<I: IntoIterator<Item = EscapedChunk<'a>>>(iter: I) -> String {
933            let mut s = String::new();
934            s.extend(iter);
935            s
936        }
937    }
938
939    /// Extends a `String` with an iterator of escaped chunks.
940    impl<'a> Extend<EscapedChunk<'a>> for String {
941        #[inline]
942        fn extend<I: IntoIterator<Item = EscapedChunk<'a>>>(&mut self, iter: I) {
943            iter.into_iter().for_each(move |chunk| {
944                self.push_str(chunk.literal);
945                if let Some(escaped_str) = chunk.escaped {
946                    self.push_str(escaped_str);
947                }
948            });
949        }
950    }
951
952    /// Collects an iterator of unescaped chunks into a byte vector.
953    impl<'a> FromIterator<UnescapedChunk<'a>> for Vec<u8> {
954        #[inline]
955        fn from_iter<I: IntoIterator<Item = UnescapedChunk<'a>>>(iter: I) -> Vec<u8> {
956            let mut buf = Vec::new();
957            buf.extend(iter);
958            buf
959        }
960    }
961
962    /// Extends a byte vector with an iterator of unescaped chunks.
963    impl<'a> Extend<UnescapedChunk<'a>> for Vec<u8> {
964        #[inline]
965        fn extend<I: IntoIterator<Item = UnescapedChunk<'a>>>(&mut self, iter: I) {
966            iter.into_iter().for_each(move |chunk| {
967                self.extend_from_slice(chunk.literal);
968                if let Some(c) = chunk.unescaped {
969                    let char_len = c.len_utf8();
970                    let old_len = self.len();
971                    self.resize(old_len + char_len, 0);
972                    c.encode_utf8(&mut self[old_len..]);
973                }
974            })
975        }
976    }
977}
978
979#[cfg(test)]
980mod tests {
981    use super::*;
982
983    impl<'a> EscapedChunk<'a> {
984        /// Creates a new `EscapedChunk`.
985        const fn new(literal: &'a str, escaped: Option<&'static str>) -> Self {
986            Self { literal, escaped }
987        }
988    }
989
990    impl<'a> UnescapedChunk<'a> {
991        /// Creates a new `UnescapedChunk`.
992        const fn new(literal: &'a [u8], unescaped: Option<char>) -> Self {
993            Self { literal, unescaped }
994        }
995    }
996
997    #[test]
998    fn escape_chunks() {
999        let mut it = escape_str("a\nb\"c");
1000        assert_eq!(
1001            it.next(),
1002            Some(EscapedChunk::new("a", Some(r#"\n"#))),
1003            "Chunk 1"
1004        );
1005        assert_eq!(
1006            it.next(),
1007            Some(EscapedChunk::new("b", Some(r#"\""#))),
1008            "Chunk 2"
1009        );
1010        assert_eq!(it.next(), Some(EscapedChunk::new("c", None)), "Chunk 3");
1011        assert_eq!(it.next(), None, "End of iterator");
1012    }
1013
1014    #[test]
1015    fn unescape_chunks() {
1016        let mut it = unescape(br"xy\t\u0020z");
1017        assert_eq!(
1018            it.next().unwrap().unwrap(),
1019            UnescapedChunk::new(b"xy", Some('\t')),
1020            "Chunk 1"
1021        );
1022        assert_eq!(
1023            it.next().unwrap().unwrap(),
1024            UnescapedChunk::new(b"", Some(' ')),
1025            "Chunk 2"
1026        );
1027        assert_eq!(
1028            it.next().unwrap().unwrap(),
1029            UnescapedChunk::new(b"z", None),
1030            "Chunk 3"
1031        );
1032        assert_eq!(it.next(), None, "End of iterator");
1033    }
1034
1035    #[test]
1036    fn test_escape_against_collected_string() {
1037        assert_eq!(
1038            escape_str("Hello, world!").collect::<String>(),
1039            "Hello, world!"
1040        );
1041        assert_eq!(escape_str("a\"b").collect::<String>(), r#"a\"b"#);
1042        assert_eq!(escape_str("\0").collect::<String>(), r#"\u0000"#);
1043        assert_eq!(
1044            escape_str("path/to/file").collect::<String>(),
1045            r#"path/to/file"#
1046        );
1047
1048        escape_str(r#"Unicode test: éàçüö. Emoji: 😀. More symbols: ❤️✅."#).for_each(|_| {});
1049    }
1050
1051    #[test]
1052    fn test_unescape_against_collected_string() {
1053        assert_eq!(
1054            unescape(br"Hello, world!").decode_utf8().unwrap(),
1055            "Hello, world!"
1056        );
1057        assert_eq!(unescape(br"a\nb").decode_utf8().unwrap(), "a\nb");
1058        assert_eq!(unescape(br"\uD83D\uDE00").decode_utf8().unwrap(), "😀");
1059    }
1060
1061    #[test]
1062    fn unescape_error_propagation() {
1063        let mut it = unescape(br"valid\k");
1064
1065        // A better design: the error is the *only* thing that comes out for that step.
1066        // The current implementation bundles the literal with the result of the escape.
1067        // Let's stick with that.
1068        let first_chunk = it.next().unwrap();
1069        assert!(matches!(first_chunk, Err(UnescapeError { .. })));
1070    }
1071
1072    // Inspired by and copied from memchr
1073    #[test]
1074    fn sync_regression() {
1075        use core::panic::{RefUnwindSafe, UnwindSafe};
1076
1077        fn assert_send_sync<T: Send + Sync + UnwindSafe + RefUnwindSafe>() {}
1078        assert_send_sync::<Unescape<'_>>();
1079        assert_send_sync::<Escape<'_>>();
1080
1081        assert_send_sync::<UnescapedChunk<'_>>();
1082        assert_send_sync::<EscapedChunk<'_>>();
1083    }
1084}
json_escape/explicit.rs

json_escape/
explicit.rs