json_escape/
lib.rs

1//! # Streaming JSON String Escape/Unescape
2//!
3//! Welcome to a highly efficient, `no_std` compatible library for handling JSON string escaping and unescaping. This crate provides iterator-based tools that process strings on the fly, avoiding heap allocations for the entire result. It's designed for performance-critical applications, such as parsing large JSON files or working in memory-constrained environments. ⚡
4//!
5//! The core of the library is two iterator structs:
6//! - **[`Escape`]**: Takes a string slice (`&str`) and yields escaped string slices ready for JSON serialization.
7//! - **[`Unescape`]**: Takes a byte slice (`&[u8]`) representing the content of a JSON string and yields the decoded byte slices.
8//!
9//! ## Key Features
10//! - **Zero-Copy Slicing**: For sequences of characters that don't need modification, the iterators yield slices that borrow directly from the input, avoiding unnecessary data copying.
11//! - **Comprehensive JSON Support**: Correctly handles all standard JSON escapes: `\"`, `\\`, `\/`, `\b`, `\f`, `\n`, `\r`, `\t`.
12//! - **Full Unicode Handling**: Correctly decodes `\uXXXX` sequences, including full support for UTF-16 surrogate pairs (e.g., `\uD83D\uDE00` for `😀`).
13//! - **Robust Error Handling**: The `Unescape` iterator returns descriptive errors (`UnescapeError`) for invalid or truncated escape sequences, making debugging straightforward.
14//! - **Allocation Control** (with `alloc` feature): Provides convenient methods to collect the iterator's output into owned types like `String` or `Cow<str>`.
15//! - **`std::io` Integration** (with `std` feature): The `Unescape` iterator implements `std::io::Read`, allowing it to be used as an efficient reader for I/O streams.
16//!
17//! ## Quick Start: Escaping a String
18//!
19//! ```
20//! use json_escape::escape_str;
21//!
22//! let input = "Hello, \"world\"!\nThis contains a \\ backslash.";
23//! let expected = r#"Hello, \"world\"!\nThis contains a \\ backslash."#;
24//!
25//! // The `escape_str` function returns an iterator.
26//! let mut escaper = escape_str(input);
27//!
28//! // You can iterate over the chunks:
29//! assert_eq!(escaper.next(), Some("Hello, "));
30//! assert_eq!(escaper.next(), Some(r#"\""#));
31//! assert_eq!(escaper.next(), Some("world"));
32//! // ...and so on.
33//!
34//! // Or, collect it into a String (requires the "alloc" feature).
35//! // let escaped_string: String = escape_str(input).collect();
36//! // assert_eq!(escaped_string, expected);
37//! ```
38//!
39//! ## Quick Start: Unescaping a String
40//!
41//! ```
42//! use json_escape::unescape;
43//!
44//! let input = r#"A 😀 emoji: \uD83D\uDE00 and a tab\t!"#;
45//!
46//! // The unescape iterator yields `Result<&[u8], _>`.
47//! let unescaper = unescape(input);
48//!
49//! // With the "alloc" feature, you can decode it directly into a string.
50//! let decoded_cow = unescaper.decode_utf8().unwrap();
51//! assert_eq!(decoded_cow, "A 😀 emoji: 😀 and a tab\t!");
52//! ```
53//!
54//! ## Performance and the `explicit` Module
55//!
56//! This crate is designed for high-performance, zero-allocation escaping and
57//! unescaping. For most use cases, the functions in this root module provide the
58//! best balance of ergonomics and speed.
59//!
60//! However, for users with extreme performance requirements, the [`explicit`]
61//! module is provided. Its iterators yield structured `Chunk` data instead of
62//! simple slices. As shown by benchmarks, this approach can be slightly faster,
63//! especially on inputs with a high density of escape sequences. If you are
64//! processing a very large volume of JSON strings in a tight loop, consider
65//! using the `explicit` module for a potential performance boost.
66#![no_std]
67#![deny(missing_docs)]
68#![cfg_attr(all(feature = "simd", nightly), feature(portable_simd))]
69
70#[cfg(any(test, feature = "std"))]
71extern crate std;
72
73#[cfg(feature = "alloc")]
74extern crate alloc;
75
76#[cfg(any(test, feature = "alloc"))]
77use alloc::{borrow::Cow, string::String, vec::Vec};
78
79use core::{
80    char,
81    fmt::{self, Write as _},
82    iter::FusedIterator,
83    str,
84};
85
86pub mod explicit;
87
88// =============================================================================
89// Escape Implementation
90// =============================================================================
91
92/// Creates a streaming JSON string escaper from a string slice.
93///
94/// The returned [`Escape`] iterator lazily processes the input string, yielding
95/// slices that represent the escaped output.
96///
97/// # Examples
98///
99/// ```
100/// use json_escape::escape_str;
101///
102/// let escaper = escape_str("a\nb");
103/// let escaped_parts: Vec<_> = escaper.collect();
104///
105/// assert_eq!(escaped_parts, vec!["a", r#"\n"#, "b"]);
106/// ```
107#[inline]
108pub fn escape_str(input: &str) -> Escape<'_> {
109    Escape {
110        bytes: input.as_bytes(),
111    }
112}
113
114/// A streaming JSON string escaper that yields `&'a str` slices.
115///
116/// This struct is created by the [`escape_str`] function. It is an [`Iterator`]
117/// that breaks the input string into chunks at each character that needs to be
118/// escaped according to JSON rules.
119///
120/// - For sequences of safe characters, it yields a single borrowed slice (`&'a str`).
121/// - For each character that must be escaped, it yields a `'static` slice
122///   containing the escaped representation (e.g., `r#"\n"#`).
123///
124/// This approach is highly efficient as it avoids allocating a new string for the
125/// entire output, processing the input in a streaming fashion.
126///
127/// ### Implemented Traits
128/// - **`Iterator<Item = &'a str>`**: Allows you to process the escaped parts in a loop or with adapters.
129/// - **`Display`**: Lets you write the escaped content directly to any formatter, like `println!` or a file, without intermediate allocation.
130/// - **`Clone`**, **`Debug`**: Standard utility traits.
131/// - **`PartialEq`**, **`PartialEq<B: AsRef<[u8]>>`**: Allows direct comparison of the escaped output. An `Escape` iterator is equal to another `Escape` or a byte slice if they produce an identical sequence of escaped bytes.
132/// - **`From<Escape<'a>> for Cow<'a, str>`** (requires `alloc` feature): Provides an efficient way to convert the iterator into a potentially owned string.
133#[derive(Clone)]
134#[must_use = "iterators are lazy and do nothing unless consumed"]
135pub struct Escape<'a> {
136    bytes: &'a [u8],
137}
138
139impl<'a> Iterator for Escape<'a> {
140    type Item = &'a str;
141
142    #[inline]
143    fn next(&mut self) -> Option<&'a str> {
144        if self.bytes.is_empty() {
145            return None;
146        }
147
148        // Find the first byte that needs escaping.
149        let pos = find_escape_char(self.bytes);
150
151        match pos {
152            // No escapable characters left; return the rest of the slice.
153            None => {
154                let s = self.bytes;
155                self.bytes = &self.bytes[self.bytes.len()..];
156                // SAFETY: The input was a valid &str, and we're returning the
157                // whole remaining chunk, so it's still valid UTF-8.
158                Some(unsafe { str::from_utf8_unchecked(s) })
159            }
160            // An escapable byte is at the beginning of the slice.
161            Some(0) => {
162                let byte = self.bytes[0];
163                self.bytes = &self.bytes[1..];
164                // The table lookup gives us a &'static str, which is a valid &'a str.
165                //
166                // Some(....unwrap()) is more correct
167                ESCAPE_TABLE[byte as usize]
168            }
169            // Found an escapable byte after a safe prefix. Return the prefix.
170            Some(p) => {
171                let (prefix, rest) = self.bytes.split_at(p);
172                self.bytes = rest;
173                // SAFETY: The soundness of this operation is critical.
174                // We are splitting the byte slice at the position of the first
175                // character that requires escaping. All JSON characters that
176                // require escaping (`"`, `\`, and control characters `\u0000`-`\u001F`)
177                // are single-byte ASCII characters. Therefore, `p` is guaranteed
178                // to be on a valid UTF-8 character boundary.
179                Some(unsafe { str::from_utf8_unchecked(prefix) })
180            }
181        }
182    }
183
184    fn size_hint(&self) -> (usize, Option<usize>) {
185        if self.bytes.is_empty() {
186            (0, Some(0))
187        } else {
188            // We'll yield at least 1 slice, and at most `len` slices if every byte is escaped.
189            (1, Some(self.bytes.len()))
190        }
191    }
192}
193
194impl<'a> FusedIterator for Escape<'a> {}
195
196impl fmt::Display for Escape<'_> {
197    /// Allows direct formatting of the escaped string without intermediate allocation.
198    ///
199    /// This is very useful for writing the escaped output directly to a stream,
200    /// such as a file or a network socket.
201    ///
202    /// # Example
203    ///
204    /// ```
205    /// use json_escape::escape_str;
206    ///
207    /// let escaper = escape_str("User said: \"Hi!\"\n");
208    /// let formatted = format!("{}", escaper);
209    ///
210    /// assert_eq!(formatted, r#"User said: \"Hi!\"\n"#);
211    /// ```
212    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
213        // The `clone()` is cheap as it only copies a slice reference.
214        for s in self.clone() {
215            f.write_str(s)?
216        }
217        Ok(())
218    }
219}
220
221impl fmt::Debug for Escape<'_> {
222    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
223        f.debug_struct("Escape").finish_non_exhaustive()
224    }
225}
226
227impl<B: AsRef<[u8]> + ?Sized> PartialEq<B> for Escape<'_> {
228    /// Compares the escaped output with any byte-slice-like object.
229    ///
230    /// This is primarily a convenience for testing, allowing you to check the
231    /// fully concatenated result of an `Escape` iterator against a known `&str` or `&[u8]`.
232    ///
233    /// The notion of equality is based on the **output**, not the iterator's internal state.
234    ///
235    /// # Example
236    ///
237    /// ```
238    /// use json_escape::escape_str;
239    ///
240    /// let escaper = escape_str("key\tvalue");
241    ///
242    /// // The escaper's output, when concatenated, equals the right-hand side.
243    /// assert_eq!(escaper, r#"key\tvalue"#);
244    /// ```
245    fn eq(&self, other: &B) -> bool {
246        let mut other = other.as_ref();
247        for chunk in self.clone() {
248            if !other.starts_with(chunk.as_bytes()) {
249                return false;
250            }
251            other = &other[chunk.len()..];
252        }
253        // We completely searched it
254        other.is_empty()
255    }
256}
257
258impl<'a, 'b> PartialEq<Escape<'a>> for Escape<'b> {
259    /// Compares two `Escape` iterators for equality.
260    ///
261    /// Two `Escape` iterators are considered equal if they'll produce the same **output**.
262    /// It first performs a fast check on the underlying byte slices.
263    fn eq(&self, other: &Escape<'a>) -> bool {
264        // Fast path: if they are views into the same underlying data.
265        self.bytes == other.bytes || chunks_eq(self.clone(), other.clone())
266    }
267}
268
269#[cfg(feature = "alloc")]
270impl<'a> From<Escape<'a>> for Cow<'a, str> {
271    /// Efficiently collects the escaped parts into a `Cow<'a, str>`.
272    ///
273    /// This implementation is optimized to avoid allocation if possible:
274    /// - If the input string requires **no escaping**, it returns `Cow::Borrowed`
275    ///   with a slice of the original string.
276    /// - If escaping is needed, it allocates a `String` and returns `Cow::Owned`.
277    ///
278    /// This is more efficient than `iter.collect::<String>()` because `collect`
279    /// will always allocate.
280    ///
281    /// **Requires the `alloc` feature.**
282    ///
283    /// # Example
284    ///
285    /// ```
286    /// # #[cfg(feature = "alloc")] {
287    /// use json_escape::escape_str;
288    /// use std::borrow::Cow;
289    ///
290    /// // No escaping needed, so no allocation occurs.
291    /// let cow_borrowed: Cow<str> = escape_str("plain text").into();
292    /// assert!(matches!(cow_borrowed, Cow::Borrowed(_)));
293    ///
294    /// // Escaping is required, so a new String is allocated.
295    /// let cow_owned: Cow<str> = escape_str("text with\nnewline").into();
296    /// assert!(matches!(cow_owned, Cow::Owned(_)));
297    /// assert_eq!(cow_owned, r#"text with\nnewline"#);
298    /// # }
299    /// ```
300    fn from(mut iter: Escape<'a>) -> Self {
301        match iter.next() {
302            None => Cow::Borrowed(""),
303            Some(first) => match iter.next() {
304                None => Cow::Borrowed(first),
305                Some(second) => {
306                    let mut string =
307                        String::with_capacity(first.len() + second.len() + iter.bytes.len());
308                    string.push_str(first);
309                    string.push_str(second);
310                    string.extend(iter);
311                    Cow::Owned(string)
312                }
313            },
314        }
315    }
316}
317
318// =============================================================================
319// Unescape Implementation
320// =============================================================================
321
322/// Creates a streaming JSON string unescaper from a byte slice.
323///
324/// This function creates an iterator to unescape a byte slice representing the
325/// **raw contents** of a JSON string, assuming the outer quotes have already
326/// been removed.
327///
328/// For a more convenient way to handle complete JSON string literals (including
329/// their surrounding `"` quotes), see the [`unescape_quoted`] function, which
330/// automatically trims them.
331///
332/// The iterator will fail if the input contains invalid JSON escape sequences.
333///
334/// # Example
335///
336/// ```
337/// use json_escape::{unescape, unescape_quoted};
338///
339/// // `unescape` works on the raw content, without quotes.
340/// let content = r#"hello\tworld"#;
341/// assert_eq!(unescape(content), "hello\tworld");
342///
343/// // If you pass a full JSON literal, the quotes are treated as literal characters.
344/// let literal = r#""hello\tworld""#;
345/// assert_eq!(unescape(literal), "\"hello\tworld\""); // Note the quotes in the output.
346///
347/// // For full literals like this, `unescape_quoted` is the recommended function.
348/// assert_eq!(unescape_quoted(literal), "hello\tworld");
349/// ```
350#[inline]
351pub fn unescape<I: AsRef<[u8]> + ?Sized>(input: &I) -> Unescape<'_> {
352    Unescape::new(input.as_ref())
353}
354
355/// Creates a streaming JSON string unescaper, trimming enclosing quotes.
356///
357/// This function acts as a convenience wrapper around [`unescape`]. It first
358/// inspects the input byte slice. If the slice begins and ends with a double-quote
359/// character (`"`), these quotes are trimmed before the inner content is passed to
360/// the unescaper.
361///
362/// If the input is not enclosed in quotes, this function behaves exactly like
363/// [`unescape`]. This is useful for directly unescaping a complete JSON string
364/// literal.
365///
366/// # Example
367///
368/// ```
369/// use json_escape::{unescape, unescape_quoted};
370///
371/// // 1. With quotes: The outer quotes are trimmed before unescaping.
372/// let unescaper = unescape_quoted(r#""hello\nworld""#);
373/// assert_eq!(unescaper, b"hello\nworld");
374///
375/// // 2. Without quotes: Behaves exactly like the standard `unescape`.
376/// let unescaper_no_quotes = unescape_quoted(r#"raw string"#);
377/// assert_eq!(unescaper_no_quotes, b"raw string");
378///
379/// // 3. Mismatched quotes: The input is passed through as-is, quotes are not trimmed.
380/// let mismatched_quotes = unescape_quoted(r#"hello""#);
381/// assert_eq!(mismatched_quotes, b"hello\"");
382///
383/// // 4. Empty quoted string: Correctly results in an empty output.
384/// let empty_quoted = unescape_quoted(r#""""#);
385/// assert_eq!(empty_quoted, b"");
386/// ```
387#[inline]
388pub fn unescape_quoted<I: AsRef<[u8]> + ?Sized>(input: &I) -> Unescape<'_> {
389    let bytes = input.as_ref();
390    let input = if bytes.len() >= 2 && bytes[0] == b'\"' && bytes[bytes.len() - 1] == b'\"' {
391        &bytes[1..bytes.len() - 1]
392    } else {
393        bytes
394    };
395
396    unescape(input)
397}
398
399/// A streaming JSON string unescaper.
400///
401/// This struct is created by the [`unescape`] function. It implements an [`Iterator`]
402/// that yields `Result<&'a [u8], UnescapeError>`, lazily decoding the input.
403///
404/// The iterator's output chunks are one of the following:
405/// - **`Ok(&'a [u8])`**: A borrowed slice of the original input for a sequence of non-escaped bytes.
406/// - **`Ok(&'static [u8])`**: A single-byte slice for a decoded escape sequence (e.g., `\n` becomes a slice containing `0x0A`).
407///   For `\uXXXX` sequences, it yields a series of single-byte slices representing the UTF-8 encoding of the character.
408/// - **`Err(UnescapeError)`**: An error indicating an invalid escape sequence, which halts further iteration as described below.
409///
410/// Because the iterator operates on bytes, you can use helper methods like
411/// [`Unescape::decode_utf8`] or [`Unescape::decode_utf8_lossy`] to convert the
412/// final result into a string.
413///
414/// # Error Handling
415///
416/// When the iterator encounters an invalid or incomplete escape, it returns an
417/// `Err(UnescapeError)` describing the problem. The iterator then remains in an
418/// **error state**: subsequent calls to `next()` will continue to return that same
419/// error (i.e., the error is idempotent) and the iterator will not produce further
420/// `Ok` chunks. This makes the behavior deterministic for callers that check the
421/// first error and then stop.
422///
423/// Errors are classified by the precise condition encountered:
424/// - **`InvalidEscape`**: The escape sequence uses an unknown escape character (e.g., `\q`).
425/// - **`InvalidHex`**: A `\u` escape contains a non-hex character where a hex
426///   digit was expected (e.g., `\uZ`).
427/// - **`UnexpectedEof`**: The input ended before a complete escape sequence could be
428///   read. This is used when there isn't enough input yet to decide whether the
429///   sequence would be valid (for instance, an incomplete `\u` or a truncated
430///   surrogate pair).
431/// - **`LoneSurrogate`**: A complete `\uXXXX` was read, and it encodes a *high*
432///   surrogate, but the following bytes definitively do not form a valid low
433///   surrogate escape (for example, the next character is a space or any
434///   non-`\u` character).
435///
436/// The difference between `UnexpectedEof` and `LoneSurrogate` is important:
437/// - `UnexpectedEof` means **we couldn't decide** because the input ended too early.
438/// - `LoneSurrogate` means **we did decide**—we saw a full `\uXXXX` high surrogate,
439///   and the following input proves a pair will not follow.
440///
441/// #### Concrete examples
442///
443/// 1) A high surrogate followed by other data (not a `\u` low-surrogate) → `LoneSurrogate`:
444///
445/// ```rust
446/// use json_escape::{unescape, UnescapeErrorKind, LoneSurrogateError};
447///
448/// let mut iter = unescape(r"\uD83D more data");
449/// let err = iter.next().unwrap().unwrap_err();
450/// assert!(matches!(err.kind(), UnescapeErrorKind::LoneSurrogate(LoneSurrogateError { surrogate: 0xD83D, .. })));
451///
452/// // Subsequent calls return the same error (iterator remains in the same error state).
453/// let err = iter.next().unwrap().unwrap_err();
454/// assert!(matches!(err.kind(), UnescapeErrorKind::LoneSurrogate(LoneSurrogateError { surrogate: 0xD83D, .. })));
455/// ```
456///
457/// 2) An invalid escape character → `InvalidEscape`:
458///
459/// ```rust
460/// use json_escape::{unescape, UnescapeErrorKind, InvalidEscapeError};
461///
462/// let mut iter = unescape(r"\q"); // `\q` is not a defined escape
463/// let err = iter.next().unwrap().unwrap_err();
464/// assert!(matches!(err.kind(), UnescapeErrorKind::InvalidEscape(InvalidEscapeError { found: b'q', .. })));
465/// ```
466///
467/// 3) A malformed `\u` with a non-hex character → `InvalidHex`:
468///
469/// ```rust
470/// use json_escape::{unescape, UnescapeErrorKind, InvalidHexError};
471///
472/// let mut iter = unescape(r"\uZ");
473/// let err = iter.next().unwrap().unwrap_err();
474/// assert!(matches!(err.kind(), UnescapeErrorKind::InvalidHex(InvalidHexError { found: b'Z', .. })));
475/// ```
476///
477/// 4) Truncated / incomplete input ⇒ `UnexpectedEof`:
478///
479/// ```rust
480/// use json_escape::{unescape, UnescapeErrorKind};
481///
482/// // a) truncated after the first \uXXXX (no following bytes yet)
483/// let mut iter = unescape(r"\uD83D");
484/// let err = iter.next().unwrap().unwrap_err();
485/// assert!(matches!(err.kind(), UnescapeErrorKind::UnexpectedEof));
486///
487/// // b) starts a second \u but is truncated before hex digits
488/// let mut iter = unescape(r"\uD83D\u");
489/// let err = iter.next().unwrap().unwrap_err();
490/// assert!(matches!(err.kind(), UnescapeErrorKind::UnexpectedEof));
491///
492/// // c) a lone backslash at end of input
493/// let mut iter = unescape("\\");
494/// let err = iter.next().unwrap().unwrap_err();
495/// assert!(matches!(err.kind(), UnescapeErrorKind::UnexpectedEof));
496/// ```
497///
498/// **Note**: This behavior intentionally mirrors common JSON parsers (e.g.,
499/// `serde_json`, Go's `encoding/json`) for the EOF vs. semantic error distinction.
500///
501/// # Implemented Traits and Usage
502///
503/// - **`Iterator<Item = Result<&'a [u8], UnescapeError>>`**: The core trait for
504///   processing the unescaped byte chunks.
505/// - **`std::io::Read`** (requires `std` feature): Lets you use the unescaper as a
506///   standard reader, perfect for integrating with other I/O APIs.
507/// - **`TryFrom<Unescape<'a>> for Cow<'a, [u8]>`** (requires `alloc` feature): An
508///   efficient way to collect the unescaped bytes, propagating any errors.
509/// - **`Clone`**, **`Debug`**: Standard utility traits.
510/// - **`PartialEq<B: AsRef<[u8]>>`**: Compares the fully unescaped output with a byte slice.
511///
512/// ## Reading Unescaped Bytes
513///
514/// With the `std` feature, `Unescape` can be used as any other `std::io::Read`
515/// source. This is ideal for streaming and decoding large JSON string contents
516/// without buffering the entire result in memory first.
517///
518/// ```rust
519/// # #[cfg(feature = "std")] {
520/// use json_escape::unescape;
521/// use std::io::Read;
522///
523/// let mut reader = unescape(r#"chunk1\nchunk2"#);
524/// let mut buf = Vec::new();
525///
526/// // Read all unescaped bytes from the iterator into the buffer.
527/// reader.read_to_end(&mut buf).unwrap();
528///
529/// assert_eq!(buf, b"chunk1\nchunk2");
530/// # }
531/// ```
532#[derive(Clone)]
533#[must_use = "iterators are lazy and do nothing unless consumed"]
534pub struct Unescape<'a> {
535    // The inner, chunk-based iterator.
536    inner: explicit::Unescape<'a>,
537    // scratch buffer for encoded UTF-8 bytes from a \uXXXX (or surrogate pair)
538    unicode: [u8; 4],
539    // We can eliminate this by depending on the header.
540    unicode_len: u8, // how many bytes are valid in buf (0 means no pending)
541    unicode_pos: u8, // how many bytes already emitted
542}
543
544impl<'a> Unescape<'a> {
545    /// Construct from a byte slice which contains the characters inside the JSON string (no quotes).
546    fn new(input: &'a [u8]) -> Self {
547        Self {
548            inner: explicit::Unescape { bytes: input },
549            unicode: [0; 4],
550            unicode_len: 0,
551            unicode_pos: 0,
552        }
553    }
554
555    #[inline]
556    fn store_unicode(&mut self, ch: char) {
557        self.unicode_len = ch.encode_utf8(&mut self.unicode).len() as u8;
558        self.unicode_pos = 0;
559    }
560
561    #[inline]
562    fn emit_pending_byte(&mut self) -> Option<u8> {
563        if self.unicode_pos < self.unicode_len {
564            let b = self.unicode[self.unicode_pos as usize];
565            self.unicode_pos += 1;
566            Some(b)
567        } else {
568            None
569        }
570    }
571
572    /// Helper to emit the full unicode sequence and advance the internal position.
573    #[inline]
574    fn emit_unicode_as_str(&mut self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
575        // The check `unicode_pos > 0` is implicit from the call site.
576        // The buffer is guaranteed to contain a valid UTF-8 sequence.
577        let s = unsafe { str::from_utf8_unchecked(&self.unicode[..self.unicode_len as usize]) };
578        f.write_str(s)?;
579
580        // Mark the entire sequence as emitted.
581        self.unicode_pos = self.unicode_len;
582
583        Ok(())
584    }
585
586    fn _display_utf8(mut self, f: &mut fmt::Formatter<'_>, lossy: bool) -> fmt::Result {
587        // The key insight: Chunks with more than one byte are *always*
588        // borrowed from the original input, as all escaped characters
589        // are yielded byte-by-byte.
590        while let Some(result) = self.next() {
591            match result {
592                Ok(chunk) => {
593                    if chunk.is_empty() {
594                        continue;
595                    }
596
597                    // THE CORE LOGIC:
598                    // Check if the iterator just yielded the *first byte* of a *multi-byte* sequence.
599                    // - `unicode_pos == 1` means the first byte was just emitted.
600                    // - `unicode_len > 1` means it's a multi-byte char (e.g., '¢', '😎').
601                    if self.unicode_pos == 1 && self.unicode_len > 1 {
602                        // This is our special case. We have the first byte in `chunk`, but
603                        // it's more efficient to write the whole character at once from our buffer.
604                        self.emit_unicode_as_str(f)?;
605                        // The iterator will no longer yield the rest of the bytes. Since our helper
606                        // has now advanced it. But to be sure...
607                        self.unicode_pos = self.unicode_len;
608                    } else {
609                        // This is the normal case:
610                        // 1. A large chunk borrowed from the original input.
611                        // 2. A single-byte escape like `\n` or `\t`.
612                        // 3. The last byte of a multi-byte sequence (or the only byte).
613                        // In all these cases, we just need to display the chunk we received.
614                        display_bytes_utf8(chunk, f, lossy)?;
615                    }
616                }
617                Err(_) => {
618                    if lossy {
619                        break;
620                    } else {
621                        return Err(fmt::Error);
622                    }
623                }
624            }
625        }
626
627        Ok(())
628    }
629
630    /// Decodes the unescaped byte stream into a UTF-8 string.
631    ///
632    /// This method consumes the iterator and collects all resulting byte chunks.
633    /// If an unescaping error occurs, it's returned immediately. If the final
634    /// sequence of bytes is not valid UTF-8, a UTF-8 error is returned.
635    ///
636    /// Like `From<Escape>`, this is optimized to return a `Cow::Borrowed` if no
637    /// escapes were present in the input, avoiding allocation.
638    ///
639    /// **Requires the `alloc` feature.**
640    ///
641    /// # Example
642    ///
643    /// ```
644    /// # #[cfg(feature = "alloc")] {
645    /// use json_escape::unescape;
646    ///
647    /// let input = r#"Emoji: \uD83D\uDE00"#;
648    /// let cow = unescape(input).decode_utf8().unwrap();
649    ///
650    /// assert_eq!(cow, "Emoji: 😀");
651    /// # }
652    /// ```
653    #[cfg(feature = "alloc")]
654    pub fn decode_utf8(self) -> Result<Cow<'a, str>, DecodeUtf8Error> {
655        match self.try_into().map_err(DecodeUtf8Error::Unescape)? {
656            Cow::Borrowed(bytes) => str::from_utf8(bytes)
657                .map(Cow::Borrowed)
658                .map_err(DecodeUtf8Error::Utf8),
659            Cow::Owned(bytes) => String::from_utf8(bytes)
660                .map(Cow::Owned)
661                .map_err(|e| DecodeUtf8Error::Utf8(e.utf8_error())),
662        }
663    }
664
665    /// Decodes the unescaped byte stream lossily into a UTF-8 string.
666    ///
667    /// This is similar to [`Unescape::decode_utf8`] but replaces any invalid UTF-8 sequences
668    /// with the replacement character (U+FFFD) instead of returning an error.
669    ///
670    /// An `UnescapeError` can still be returned if the JSON escaping itself is invalid.
671    ///
672    /// **Requires the `alloc` feature.**
673    #[cfg(feature = "alloc")]
674    pub fn decode_utf8_lossy(self) -> Result<Cow<'a, str>, UnescapeError> {
675        Ok(decode_utf8_lossy(self.try_into()?))
676    }
677
678    /// Returns a wrapper that implements [`fmt::Display`].
679    ///
680    /// This allows an `Unescape` iterator to be used directly with formatting
681    /// macros like `println!`, `format!`, etc. It writes the unescaped content
682    /// directly to the formatter's buffer, **avoiding any heap allocations**.
683    ///
684    /// The iterator is consumed, and the resulting unescaped string is written
685    /// to the formatter. Any invalid JSON escape sequences or invalid UTF-8 will
686    /// cause a `fmt::Error`. **You should be cautious when using this method
687    /// with the `format!` macro, as a `fmt::Error` from us will cause the macro
688    /// to panic**.
689    ///
690    /// For a more robust alternative that will not panic on `UnescapeError` or
691    /// invalid bytes, consider using [`Unescape::display_utf8_lossy`] instead.
692    ///
693    /// This method is a **zero-allocation** alternative to [`Unescape::decode_utf8`],
694    /// which might allocate a `String` to return the unescaped content.
695    ///
696    /// # Example
697    ///
698    /// ```
699    /// use json_escape::unescape;
700    ///
701    /// let original = r#"Hello, \uD83C\uDF0E!"#;
702    /// let unescaper = unescape(original);
703    ///
704    /// let formatted = format!("{}", unescaper.display_utf8());
705    /// assert_eq!(formatted, "Hello, 🌎!");
706    /// ```
707    pub fn display_utf8(self) -> DisplayUnescape<'a> {
708        DisplayUnescape { inner: self }
709    }
710
711    /// Returns a wrapper that implements [`fmt::Display`] lossily.
712    ///
713    /// This method is an **allocation-free** way to write unescaped content
714    /// to a formatter. It handles invalid JSON escape sequences and invalid
715    /// UTF-8 gracefully, making it a "lossy" operation.
716    ///
717    /// - **Invalid JSON escape sequences:** Instead of causing an error, the iterator
718    ///   terminates without an error.
719    /// - **Invalid UTF-8 bytes:** These are replaced with the Unicode
720    ///   replacement character (U+FFFD).
721    ///
722    /// This method is the **zero-allocation** counterpart to [`Unescape::decode_utf8_lossy`].
723    pub fn display_utf8_lossy(self) -> DisplayUnescapeLossy<'a> {
724        DisplayUnescapeLossy { inner: self }
725    }
726}
727
728impl<'a> Iterator for Unescape<'a> {
729    type Item = Result<&'a [u8], UnescapeError>;
730
731    fn next(&mut self) -> Option<Self::Item> {
732        // If we have pending bytes, emit them first (fast).
733        if let Some(s) = self.emit_pending_byte() {
734            // s: &'static [u8] coerces to &'a [u8]
735            return Some(Ok(byte_as_static_slice(s)));
736        }
737
738        match self.inner.next() {
739            Some(Ok(chunk)) => {
740                if let Some(ch) = chunk.unescaped {
741                    self.store_unicode(ch);
742                }
743                Some(Ok(chunk.literal))
744            }
745            Some(Err(err)) => Some(Err(err)),
746            None => None,
747        }
748    }
749
750    fn size_hint(&self) -> (usize, Option<usize>) {
751        // The maximum size is the remaining length of the underlying iter + pending_unicode
752        let (lower, upper) = self.inner.size_hint();
753        let upper = upper.map(|x| x + (self.unicode_len as usize));
754        (lower, upper)
755    }
756}
757
758impl<'a> FusedIterator for Unescape<'a> {}
759
760#[cfg(feature = "std")]
761impl std::io::Read for Unescape<'_> {
762    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
763        let mut total_written = 0;
764        let mut remaining_buf = buf;
765
766        // Loop until the destination buffer is full or we are completely out of data.
767        loop {
768            // Priority 1: Drain any pending bytes from an unescaped character first.
769            if self.unicode_pos < self.unicode_len {
770                let pending_unicode =
771                    &self.unicode[self.unicode_pos as usize..self.unicode_len as usize];
772                let bytes_to_copy = pending_unicode.len().min(remaining_buf.len());
773
774                remaining_buf[..bytes_to_copy].copy_from_slice(&pending_unicode[..bytes_to_copy]);
775                self.unicode_pos += bytes_to_copy as u8;
776                total_written += bytes_to_copy;
777                remaining_buf = &mut remaining_buf[bytes_to_copy..];
778
779                // If buffer is now full, we are done for this call.
780                if remaining_buf.is_empty() {
781                    break;
782                }
783            }
784            if self.unicode_pos >= self.unicode_len {
785                self.unicode_pos = 0;
786                self.unicode_len = 0;
787            }
788
789            // Priority 2: Get and process a new chunk from the inner iterator.
790            match self.inner.next() {
791                Some(Ok(chunk)) => {
792                    let bytes_to_copy = chunk.literal.len().min(remaining_buf.len());
793                    if bytes_to_copy > 0 {
794                        remaining_buf[..bytes_to_copy]
795                            .copy_from_slice(&chunk.literal[..bytes_to_copy]);
796                        total_written += bytes_to_copy;
797                        remaining_buf = &mut remaining_buf[bytes_to_copy..];
798                    }
799
800                    // ### THE BACKTRACKING TRICK ###
801                    // This block executes if the destination `buf` was filled before we could
802                    // finish reading the `literal` part of the current chunk.
803                    if bytes_to_copy < chunk.literal.len() {
804                        // We must reconstruct the *entire unread portion of the stream*.
805                        // This includes:
806                        //   1. The rest of the literal (e.g., "de").
807                        //   2. The original escaped sequence (e.g., "\\n").
808                        //   3. The rest of the stream that followed (e.g., "fghi").
809                        //
810                        // These parts are all contiguous in the original input slice.
811                        // We can create a new slice view over this memory using pointer arithmetic.
812
813                        // SAFETY: This is safe for several reasons:
814                        // 1. `chunk.literal` and `self.inner.bytes` are both derived from the same
815                        //    original slice with lifetime `'a`. All memory is valid.
816                        // 2. `new_start_ptr` points to the start of the unread literal part, a valid memory location.
817                        // 3. `stream_end_ptr` points to the end of the stream that `self.inner.bytes` currently sees.
818                        // 4. The resulting slice is therefore a valid, contiguous sub-slice of the original input.
819                        unsafe {
820                            // Pointer to the first byte of the unread part of the literal.
821                            let new_start_ptr = chunk.literal.as_ptr().add(bytes_to_copy);
822
823                            // Pointer to one byte past the end of the remaining stream.
824                            // We don't set self.inner.bytes to &[] in explicit
825                            let stream_end_ptr =
826                                self.inner.bytes.as_ptr().add(self.inner.bytes.len());
827
828                            // The new length is the distance between these two pointers.
829                            let new_len = stream_end_ptr as usize - new_start_ptr as usize;
830
831                            // Reset the inner iterator's slice to this reconstructed view.
832                            self.inner.bytes = std::slice::from_raw_parts(new_start_ptr, new_len);
833                        }
834
835                        // Since the buffer is full, we must stop and return. The next `read` call
836                        // will now correctly resume from the middle of the previous chunk.
837                        break;
838                    }
839
840                    // If we get here, the entire literal was consumed. Now handle the unescaped char.
841                    if let Some(ch) = chunk.unescaped {
842                        let encoded = ch.encode_utf8(&mut self.unicode);
843                        self.unicode_len = encoded.len() as u8;
844                        // Loop to immediately process the newly buffered unicode bytes.
845                        continue;
846                    }
847                }
848                Some(Err(e)) => {
849                    return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, e));
850                }
851                None => break, // Inner iterator is exhausted.
852            }
853        }
854
855        Ok(total_written)
856    }
857
858    // We can provide an optimized version of read_to_end
859    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> std::io::Result<usize> {
860        let start_len = buf.len();
861
862        // Now, efficiently consume the rest of the iterator
863        for result in self {
864            match result {
865                Ok(chunk) => buf.extend_from_slice(chunk),
866                Err(err) => return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, err)),
867            }
868        }
869
870        Ok(buf.len() - start_len)
871    }
872}
873
874impl fmt::Debug for Unescape<'_> {
875    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
876        f.debug_struct("Unescape").finish_non_exhaustive()
877    }
878}
879
880impl<B: AsRef<[u8]> + ?Sized> PartialEq<B> for Unescape<'_> {
881    /// Compares the unescaped output with a byte-slice-like object.
882    ///
883    /// An `Unescape` iterator is considered equal to a byte slice if it successfully
884    /// unescapes to produce a sequence of bytes identical to that slice. If the
885    /// iterator would produce an error, the comparison returns `false`.
886    ///
887    /// # Example
888    ///
889    /// ```
890    /// use json_escape::unescape;
891    ///
892    /// let unescaper = unescape(r#"hello\nworld"#);
893    /// assert_eq!(unescaper, b"hello\nworld");
894    ///
895    /// // An iterator that produces an error is not equal to any valid slice.
896    /// let failing_unescaper = unescape(r#"\k"#);
897    /// assert_ne!(failing_unescaper, b"k");
898    /// ```
899    fn eq(&self, other: &B) -> bool {
900        let mut other = other.as_ref();
901        for result in self.clone() {
902            match result {
903                Ok(chunk) => {
904                    if !other.starts_with(chunk) {
905                        return false;
906                    }
907                    other = &other[chunk.len()..];
908                }
909                Err(_) => return false, // An erroring iterator cannot be equal to a valid slice.
910            }
911        }
912        other.is_empty()
913    }
914}
915
916impl<B: AsRef<[u8]>> PartialEq<Unescape<'_>> for Result<B, UnescapeError> {
917    /// Compares the unescaper's outcome with a `Result`.
918    ///
919    /// This implementation allows for precise testing of the `Unescape` iterator
920    /// by comparing it against either a successful outcome (`Ok`) or a specific
921    /// failure (`Err`).
922    ///
923    /// - If `result` is `Ok(bytes)`, the comparison is `true` only if the iterator
924    ///   completes successfully and its concatenated output is identical to `bytes`.
925    ///
926    /// - If `result` is `Err(error)`, the comparison is `true` only if the iterator
927    ///   produces the exact same `UnescapeError`.
928    ///
929    /// # Example
930    ///
931    /// ```
932    /// use json_escape::{unescape, UnescapeError, InvalidEscapeError};
933    ///
934    /// // --- Success Case ---
935    /// let unescaper = unescape(r#"hello\tworld"#);
936    /// // The comparison is against an `Ok` variant.
937    /// assert_eq!(Ok("hello\tworld"), unescaper);
938    ///
939    /// // --- Error Case ---
940    /// let failing_unescaper = unescape(r#"invalid-\u"#);
941    /// // We can assert that the iterator produces a specific error.
942    /// # let unexpected_eof = unescape(r"\u").next().unwrap().unwrap_err();
943    /// assert_eq!(Err::<&str, _>(unexpected_eof), failing_unescaper);
944    /// ```
945    fn eq(&self, unescape: &Unescape<'_>) -> bool {
946        match self {
947            Ok(expected_bytes) => unescape == expected_bytes,
948            Err(expected_error) => {
949                for result in unescape.clone() {
950                    if let Err(actual_error) = result {
951                        // The iterator's first error is its final outcome.
952                        // It must match the expected error exactly.
953                        return actual_error == *expected_error;
954                    }
955                }
956                // `unescape` completed successfully, but an error was expected.
957                false
958            }
959        }
960    }
961}
962
963impl<'a, 'b> PartialEq<Unescape<'a>> for Unescape<'b> {
964    /// Compares two `Unescape` iterators for equality based on their terminal result.
965    ///
966    /// The equality of two `Unescape` iterators is determined by the final `Result`
967    /// that would be obtained if each iterator were fully consumed (e.g., by using `try_collect()`).
968    ///
969    /// The specific rules are as follows:
970    ///
971    /// 1.  **Error vs. Error**: If both iterators terminate with an `Err`, they are
972    ///     considered **equal** if and only if their `UnescapeError`s are identical.
973    ///     Any bytes successfully unescaped *before* the error are ignored in this case.
974    /// 2.  **Success vs. Success**: If both iterators terminate with `Ok`, they are
975    ///     considered **equal** if and only if the complete sequence of unescaped bytes
976    ///     is identical for both.
977    /// 3.  **Success vs. Error**: If one iterator terminates with `Ok` and the other
978    ///     with `Err`, they are always **not equal**.
979    ///
980    /// # Example
981    ///
982    /// ```
983    /// use json_escape::unescape;
984    ///
985    /// // Case 1: Both iterators produce the same error. They are equal,
986    /// // even though their valid prefixes ("a" and "b") are different.
987    /// let failing_a = unescape(r#"a\k"#);
988    /// let failing_b = unescape(r#"b\k"#);
989    /// assert_eq!(failing_a, failing_b);
990    ///
991    /// // Case 2: Both iterators succeed. Equality depends on the byte stream.
992    /// let successful_a = unescape(r#"hello\nworld"#);
993    /// let successful_b = unescape(r#"hello\nworld"#);
994    /// assert_eq!(successful_a, successful_b);
995    ///
996    /// let successful_c = unescape(r#"different"#);
997    /// assert_ne!(successful_a, successful_c);
998    ///
999    /// // Case 3: One succeeds and one fails. They are not equal.
1000    /// let succeeding = unescape(r#"stop"#);
1001    /// let failing = unescape(r#"stop\k"#);
1002    /// assert_ne!(succeeding, failing);
1003    ///
1004    /// // Case 4: Both iterators fail differently. They are not equal.
1005    /// let failing_a = unescape(r#"data:\k"#);
1006    /// let failing_b = unescape(r#"data:\"#);
1007    /// assert_ne!(failing_a, failing_b);
1008    /// ```
1009    fn eq(&self, other: &Unescape<'a>) -> bool {
1010        // Fast path: if they are views into the same underlying data with the same state.
1011        ((self.inner.bytes == other.inner.bytes)
1012            && (self.unicode == other.unicode)
1013            && (self.unicode_len == other.unicode_len)
1014            && (self.unicode_pos == other.unicode_pos))
1015            || {
1016                let mut a_error = None;
1017                let mut b_error = None;
1018
1019                let mut a = self.clone().map_while(|result| match result {
1020                    Ok(ok) => Some(ok),
1021                    Err(err) => {
1022                        a_error = Some(err);
1023                        None
1024                    }
1025                });
1026
1027                let mut b = other.clone().map_while(|result| match result {
1028                    Ok(ok) => Some(ok),
1029                    Err(err) => {
1030                        b_error = Some(err);
1031                        None
1032                    }
1033                });
1034
1035                let streams_match = chunks_eq(&mut a, &mut b);
1036
1037                // Drain the iterators to ensure the error state is captured,
1038                // especially if chunks_eq returned false early.
1039                // (e.g unescape("a\k") and unescape("b\k") which are actually
1040                // equal)
1041                a.for_each(|_| {});
1042                b.for_each(|_| {});
1043
1044                match (a_error, b_error) {
1045                    // Both errored: equality depends only on the errors being the same.
1046                    (Some(a_err), Some(b_err)) => a_err == b_err,
1047                    // Both succeeded: equality depends on the byte streams having been identical.
1048                    (None, None) => streams_match,
1049                    // One errored and the other didn't: they are not equal.
1050                    _ => false,
1051                }
1052            }
1053    }
1054}
1055
1056#[cfg(feature = "alloc")]
1057impl<'a> TryFrom<Unescape<'a>> for Cow<'a, [u8]> {
1058    type Error = UnescapeError;
1059
1060    /// Efficiently collects the unescaped bytes into a `Cow<'a, [u8]>`.
1061    ///
1062    /// This implementation will return `Cow::Borrowed` if the original input contained
1063    /// no escape sequences, avoiding allocation. Otherwise, it returns `Cow::Owned`.
1064    ///
1065    /// If any `UnescapeError` is encountered during iteration, the operation
1066    /// halts and returns that error.
1067    ///
1068    /// **Requires the `alloc` feature.**
1069    fn try_from(mut value: Unescape<'a>) -> Result<Self, Self::Error> {
1070        match value.next() {
1071            None => Ok(Cow::Borrowed(b"")),
1072            Some(Ok(first)) => match value.next() {
1073                None => Ok(Cow::Borrowed(first)),
1074                Some(Ok(second)) => {
1075                    let mut buf =
1076                        Vec::with_capacity(first.len() + second.len() + value.inner.bytes.len());
1077                    buf.extend_from_slice(first);
1078                    buf.extend_from_slice(second);
1079                    for item in value {
1080                        buf.extend_from_slice(item?);
1081                    }
1082                    Ok(Cow::Owned(buf))
1083                }
1084                Some(Err(e)) => Err(e),
1085            },
1086            Some(Err(e)) => Err(e),
1087        }
1088    }
1089}
1090
1091// =============================================================================
1092// DisplayUnescape Implementation
1093// =============================================================================
1094
1095/// A wrapper for an [`Unescape`] iterator that implements [`fmt::Display`].
1096///
1097/// This struct is created by the [`Unescape::display_utf8()`] method. It allows for
1098/// printing the unescaped content directly to a formatter, which **avoids
1099/// any heap allocations**. The unescaping and UTF-8 decoding are performed on-the-fly as the
1100/// `fmt` method is called.
1101pub struct DisplayUnescape<'a> {
1102    inner: Unescape<'a>,
1103}
1104
1105impl fmt::Display for DisplayUnescape<'_> {
1106    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1107        self.inner.clone()._display_utf8(f, false)
1108    }
1109}
1110
1111/// A wrapper for an [`Unescape`] iterator that implements [`fmt::Display`] lossily.
1112///
1113/// This struct is created by the [`Unescape::display_utf8_lossy()`] method. Like
1114/// `DisplayUnescape`, it performs its operation **without any heap allocations**.
1115///
1116/// This method differs from `display_utf8` in that it handles two types of
1117/// errors gracefully:
1118/// - Invalid JSON escape sequences will be ignored, and the iterator will
1119///   continue to completion without a `fmt::Error`.
1120/// - Invalid UTF-8 byte sequences will be replaced with the Unicode
1121///   replacement character (``, U+FFFD)
1122pub struct DisplayUnescapeLossy<'a> {
1123    inner: Unescape<'a>,
1124}
1125
1126impl fmt::Display for DisplayUnescapeLossy<'_> {
1127    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1128        // Lossy mode: replace invalid sequences with U+FFFD and continue.
1129        self.inner.clone()._display_utf8(f, true)
1130    }
1131}
1132
1133// =============================================================================
1134// Error Types
1135// =============================================================================
1136
1137/// An error that can occur when decoding the final byte stream to a UTF-8 string.
1138#[derive(Copy, Eq, PartialEq, Clone, Debug)]
1139pub enum DecodeUtf8Error {
1140    /// The unescaped byte sequence was not valid UTF-8.
1141    Utf8(str::Utf8Error),
1142    /// An error occurred during the JSON unescaping process itself.
1143    Unescape(UnescapeError),
1144}
1145
1146impl fmt::Display for DecodeUtf8Error {
1147    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1148        match self {
1149            DecodeUtf8Error::Utf8(e) => fmt::Display::fmt(e, f),
1150            DecodeUtf8Error::Unescape(e) => fmt::Display::fmt(e, f),
1151        }
1152    }
1153}
1154
1155/// Details of an invalid escape sequence error.
1156#[derive(Copy, Eq, PartialEq, Clone, Debug)]
1157#[non_exhaustive]
1158pub struct InvalidEscapeError {
1159    /// The invalid character found after a `\`.
1160    pub found: u8,
1161}
1162
1163/// Details of a lone UTF-16 surrogate error.
1164#[derive(Copy, Eq, PartialEq, Clone, Debug)]
1165#[non_exhaustive]
1166pub struct LoneSurrogateError {
1167    /// The 16-bit surrogate code point.
1168    pub surrogate: u16,
1169}
1170
1171/// Details of an invalid hex digit error within a `\uXXXX` sequence.
1172#[derive(Copy, Eq, PartialEq, Clone, Debug)]
1173#[non_exhaustive]
1174pub struct InvalidHexError {
1175    /// The non-hex character that was found.
1176    pub found: u8,
1177}
1178
1179impl fmt::Display for InvalidHexError {
1180    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1181        write!(f, "found invalid hex digit '0x{:02X}'", self.found)
1182    }
1183}
1184
1185/// An error that can occur during the JSON string unescaping process.
1186#[derive(Copy, Eq, PartialEq, Clone, Debug)]
1187pub struct UnescapeError {
1188    /// The specific kind of unescaping error.
1189    pub(crate) kind: UnescapeErrorKind,
1190    /// The byte offset from the start of the escape sequence (`\`) where the
1191    /// error was detected.
1192    ///
1193    /// This is guaranteed to be less than 12, as the maximum escape sequence
1194    /// is `\uXXXX\uXXXX`.
1195    pub(crate) offset: u8,
1196}
1197
1198impl UnescapeError {
1199    /// Returns the specific kind of error that occurred.
1200    ///
1201    /// This can be used to programmatically handle different error types,
1202    /// such as distinguishing between a malformed hex sequence and an
1203    /// invalid escape character.
1204    ///
1205    /// ### Example
1206    ///
1207    /// ```
1208    /// # use json_escape::{unescape, UnescapeErrorKind, InvalidHexError};
1209    /// let mut unescaper = unescape(r#"\u123Z"#);
1210    /// let err = unescaper.next().unwrap().unwrap_err();
1211    ///
1212    /// match err.kind() {
1213    ///     UnescapeErrorKind::InvalidHex(InvalidHexError { found, .. }) => {
1214    ///         // We can inspect the exact invalid character found.
1215    ///         assert_eq!(found, b'Z');
1216    ///     }
1217    ///     _ => panic!("Expected an InvalidHex error"),
1218    /// }
1219    /// ```
1220    pub fn kind(&self) -> UnescapeErrorKind {
1221        self.kind
1222    }
1223
1224    /// Returns the byte offset from the start of the escape sequence (`\`)
1225    /// where the error was detected.
1226    ///
1227    /// - For `\x`, the offset is `1` (pointing to `x`).
1228    /// - For `\u123?`, the offset is `5` (pointing to `?`).
1229    /// - For a lone surrogate `\uD800`, the offset is `6` (pointing after the sequence).
1230    ///
1231    /// This is useful for providing detailed error messages that can point
1232    /// to the exact location of the problem in the source string.
1233    ///
1234    /// ### Example
1235    ///
1236    /// ```
1237    /// # use json_escape::unescape;
1238    /// let json_string_content = r#"bad escape \x here"#;
1239    /// let mut unescaper = unescape(json_string_content);
1240    ///
1241    /// // previous read
1242    /// // { ... }
1243    ///
1244    /// let err = unescaper.next().unwrap().unwrap_err();
1245    ///
1246    /// // The error occurred at the 'x', which is 1 byte after the '\'
1247    /// assert_eq!(err.offset(), 1);
1248    ///
1249    /// // You could use this to highlight the error in the original input
1250    /// let backslash_pos = json_string_content.find('\\').unwrap();
1251    /// let error_pos = backslash_pos + err.offset() as usize;
1252    /// assert_eq!(json_string_content.as_bytes()[error_pos], b'x');
1253    ///
1254    /// // The generated error message also includes this info.
1255    /// let expected_msg = "invalid escape: '\\0x78' at offset 1";
1256    /// assert_eq!(err.to_string(), expected_msg);
1257    /// ```
1258    pub fn offset(&self) -> u8 {
1259        self.offset
1260    }
1261}
1262
1263/// The specific kind of error that can occur during JSON string unescaping.
1264///
1265/// This enum covers all possible failures described by the JSON standard for string contents.
1266#[derive(Copy, Eq, PartialEq, Clone, Debug)]
1267#[non_exhaustive]
1268pub enum UnescapeErrorKind {
1269    /// Found a backslash followed by an unexpected character (e.g., `\x`).
1270    InvalidEscape(InvalidEscapeError),
1271    /// Found `\u` but the following characters were not 4 valid hex digits.
1272    InvalidHex(InvalidHexError),
1273    /// Input ended unexpectedly while parsing an escape sequence (e.g., `\u12`).
1274    UnexpectedEof,
1275    /// The `\u` sequence yielded a lone high or low surrogate without a matching pair.
1276    LoneSurrogate(LoneSurrogateError),
1277}
1278
1279impl fmt::Display for UnescapeError {
1280    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1281        match self.kind {
1282            UnescapeErrorKind::InvalidEscape(e) => {
1283                write!(
1284                    f,
1285                    "invalid escape: '\\0x{:02X}' at offset {}",
1286                    e.found, self.offset
1287                )
1288            }
1289            UnescapeErrorKind::InvalidHex(ref s) => {
1290                write!(f, "{} at offset {}", s, self.offset)
1291            }
1292            UnescapeErrorKind::UnexpectedEof => {
1293                write!(
1294                    f,
1295                    "unexpected end of input while parsing escape sequence, expected character at offset {}",
1296                    self.offset
1297                )
1298            }
1299            UnescapeErrorKind::LoneSurrogate(e) => write!(
1300                f,
1301                "invalid unicode sequence: lone surrogate found: 0x{:04X} at offset {}",
1302                e.surrogate, self.offset
1303            ),
1304        }
1305    }
1306}
1307
1308impl core::error::Error for UnescapeError {}
1309impl core::error::Error for DecodeUtf8Error {
1310    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
1311        match self {
1312            DecodeUtf8Error::Utf8(e) => Some(e),
1313            DecodeUtf8Error::Unescape(e) => Some(e),
1314        }
1315    }
1316}
1317
1318// =============================================================================
1319// Utilities
1320// =============================================================================
1321
1322// A const lookup table for JSON escape sequences.
1323// Maps a byte to its escaped `&'static str` representation.
1324// `None` indicates the byte does not need to be escaped.
1325const ESCAPE_TABLE: [Option<&'static str>; 256] = {
1326    let mut table: [Option<&'static str>; 256] = [None; 256];
1327
1328    // Special characters
1329    table[b'"' as usize] = Some(r#"\""#);
1330    table[b'\\' as usize] = Some(r#"\\"#);
1331
1332    // Common control characters with short escapes
1333    table[0x08] = Some(r#"\b"#); // Backspace
1334    table[0x09] = Some(r#"\t"#); // Tab
1335    table[0x0A] = Some(r#"\n"#); // Line Feed
1336    table[0x0C] = Some(r#"\f"#); // Form Feed
1337    table[0x0D] = Some(r#"\r"#); // Carriage Return
1338
1339    // The rest of the control characters must be `\uXXXX` encoded.
1340    // We can pre-calculate and store all of them as static strings.
1341    table[0x00] = Some(r#"\u0000"#);
1342    table[0x01] = Some(r#"\u0001"#);
1343    table[0x02] = Some(r#"\u0002"#);
1344    table[0x03] = Some(r#"\u0003"#);
1345    table[0x04] = Some(r#"\u0004"#);
1346    table[0x05] = Some(r#"\u0005"#);
1347    table[0x06] = Some(r#"\u0006"#);
1348    table[0x07] = Some(r#"\u0007"#);
1349    // 0x08 to 0x0D are already handled above
1350    table[0x0B] = Some(r#"\u000b"#);
1351    table[0x0E] = Some(r#"\u000e"#);
1352    table[0x0F] = Some(r#"\u000f"#);
1353    table[0x10] = Some(r#"\u0010"#);
1354    table[0x11] = Some(r#"\u0011"#);
1355    table[0x12] = Some(r#"\u0012"#);
1356    table[0x13] = Some(r#"\u0013"#);
1357    table[0x14] = Some(r#"\u0014"#);
1358    table[0x15] = Some(r#"\u0015"#);
1359    table[0x16] = Some(r#"\u0016"#);
1360    table[0x17] = Some(r#"\u0017"#);
1361    table[0x18] = Some(r#"\u0018"#);
1362    table[0x19] = Some(r#"\u0019"#);
1363    table[0x1A] = Some(r#"\u001a"#);
1364    table[0x1B] = Some(r#"\u001b"#);
1365    table[0x1C] = Some(r#"\u001c"#);
1366    table[0x1D] = Some(r#"\u001d"#);
1367    table[0x1E] = Some(r#"\u001e"#);
1368    table[0x1F] = Some(r#"\u001f"#);
1369
1370    table
1371};
1372
1373// A simple boolean-like lookup table for SIMD.
1374// 0 = no escape needed, 1 = escape needed.
1375// This is very compact (256 bytes) and fits easily in the L1 cache.
1376#[allow(unused)]
1377const ESCAPE_DECISION_TABLE: [u8; 256] = {
1378    let mut table = [0u8; 256];
1379    let mut i = 0;
1380    while i < 256 {
1381        if ESCAPE_TABLE[i].is_some() {
1382            table[i] = 1;
1383        }
1384        i += 1;
1385    }
1386    table
1387};
1388
1389// This is the SIMD version, compiled only when the "simd" feature is enabled on nightly build.
1390#[cfg(all(feature = "simd", nightly))]
1391#[inline]
1392fn find_escape_char(bytes: &[u8]) -> Option<usize> {
1393    use std::simd::{Simd, prelude::SimdPartialEq, prelude::SimdPartialOrd};
1394
1395    const LANES: usize = 16; // Process 16 bytes at a time (fits in SSE2/AVX)
1396    let mut i = 0;
1397
1398    // SIMD main loop
1399    while i + LANES <= bytes.len() {
1400        // Load 16 bytes from the slice into a SIMD vector.
1401        let chunk = Simd::<u8, LANES>::from_slice(&bytes[i..]);
1402
1403        // Create comparison vectors. These are effectively 16 copies of the byte.
1404        let space_v = Simd::splat(b' ' - 1); // For the < ' ' check (i.e., <= 0x1F)
1405        let quote_v = Simd::splat(b'"');
1406        let slash_v = Simd::splat(b'\\');
1407
1408        // Perform all 16 comparisons at once. The result is a mask.
1409        let lt_space_mask = chunk.simd_le(space_v);
1410        let eq_quote_mask = chunk.simd_eq(quote_v);
1411        let eq_slash_mask = chunk.simd_eq(slash_v);
1412
1413        // Combine the masks. A byte needs escaping if ANY of the conditions are true.
1414        let combined_mask = lt_space_mask | eq_quote_mask | eq_slash_mask;
1415
1416        // Check if any lane in the combined mask is true.
1417        if combined_mask.any() {
1418            // If yes, find the index of the *first* true lane.
1419            // trailing_zeros() on the bitmask gives us this index directly.
1420            let first_match_index = combined_mask.to_bitmask().trailing_zeros() as usize;
1421            return Some(i + first_match_index);
1422        }
1423
1424        i += LANES;
1425    }
1426
1427    // Handle the remaining bytes (if any) with the simple iterator method.
1428    if i < bytes.len() {
1429        if let Some(pos) = bytes[i..]
1430            .iter()
1431            .position(|&b| ESCAPE_DECISION_TABLE[b as usize] != 0)
1432        {
1433            return Some(i + pos);
1434        }
1435    }
1436
1437    None
1438}
1439
1440#[cfg(all(feature = "simd", not(nightly), target_arch = "x86_64"))]
1441#[inline]
1442fn find_escape_char(bytes: &[u8]) -> Option<usize> {
1443    // This is the stable Rust path using explicit CPU intrinsics.
1444    // It's guarded by cfg flags to only compile on x86_64 with the simd feature.
1445    use std::arch::x86_64::*;
1446
1447    let mut i = 0;
1448    const LANES: usize = 16; // SSE2 works on 128-bit registers, which is 16 bytes.
1449
1450    // On x86_64, we can tell the compiler to use SSE2 features in this specific function.
1451    // This is safe because we've already checked the target architecture.
1452    #[target_feature(enable = "sse2")]
1453    unsafe fn find_in_chunk(bytes: &[u8], i: usize) -> Option<usize> {
1454        // Load 16 bytes of data from the slice.
1455        let chunk = unsafe { _mm_loadu_si128(bytes.as_ptr().add(i) as *const _) };
1456
1457        // Create comparison vectors for quote and slash.
1458        let quote_v = _mm_set1_epi8(b'"' as i8);
1459        let slash_v = _mm_set1_epi8(b'\\' as i8);
1460
1461        // Emulate unsigned comparison for control characters
1462        // Create a vector with the value 0x80 in each lane.
1463        let bias = _mm_set1_epi8(0x80u8 as i8);
1464        // Create the comparison vector for bytes < 0x20 (' ').
1465        let space_v = _mm_set1_epi8(b' ' as i8);
1466
1467        // Bias both the input chunk and the comparison vector by XORing with 0x80.
1468        let biased_chunk = _mm_xor_si128(chunk, bias);
1469        let biased_space_v = _mm_xor_si128(space_v, bias);
1470
1471        // Now, a signed less-than comparison on the biased values gives the
1472        // same result as an unsigned less-than on the original values.
1473        let lt_space_mask = _mm_cmplt_epi8(biased_chunk, biased_space_v);
1474
1475        // Perform the equality comparisons (these are unaffected by signedness).
1476        let eq_quote_mask = _mm_cmpeq_epi8(chunk, quote_v);
1477        let eq_slash_mask = _mm_cmpeq_epi8(chunk, slash_v);
1478
1479        // Combine the results.
1480        let combined_mask = _mm_or_si128(lt_space_mask, _mm_or_si128(eq_quote_mask, eq_slash_mask));
1481
1482        // Create a bitmask to find the first match.
1483        let mask = _mm_movemask_epi8(combined_mask);
1484
1485        if mask != 0 {
1486            Some(i + mask.trailing_zeros() as usize)
1487        } else {
1488            None
1489        }
1490    }
1491    // Main loop
1492    while i + LANES <= bytes.len() {
1493        if let Some(result) = unsafe { find_in_chunk(bytes, i) } {
1494            return Some(result);
1495        }
1496        i += LANES;
1497    }
1498
1499    // Handle the remainder with the fast scalar lookup.
1500    if i < bytes.len() {
1501        if let Some(pos) = bytes[i..]
1502            .iter()
1503            .position(|&b| ESCAPE_DECISION_TABLE[b as usize] != 0)
1504        {
1505            return Some(i + pos);
1506        }
1507    }
1508
1509    None
1510}
1511
1512// A fallback for when SIMD feature is off.
1513#[cfg(not(feature = "simd"))]
1514#[inline]
1515fn find_escape_char(bytes: &[u8]) -> Option<usize> {
1516    bytes
1517        .iter()
1518        .position(|&b| ESCAPE_DECISION_TABLE[b as usize] != 0)
1519}
1520
1521#[cfg(all(feature = "simd", not(nightly), not(target_arch = "x86_64")))]
1522compile_error! { "simd requires nightly or target_arch = \"x86_64\"" }
1523
1524/// Static table mapping every u8 -> a &'static [u8] of length 1.
1525/// This lets us return a `'static` slice for any single byte cheaply.
1526const U8_TABLE: [[u8; 1]; 256] = {
1527    let mut arr = [[0u8; 1]; 256];
1528    let mut i = 0usize;
1529    while i < 256 {
1530        arr[i] = [i as u8];
1531        i += 1;
1532    }
1533    arr
1534};
1535
1536#[inline(always)]
1537fn byte_as_static_slice(b: u8) -> &'static [u8] {
1538    // coerce from &'static [u8;1] to &'static [u8]
1539    &U8_TABLE[b as usize]
1540}
1541
1542// The following function is copied from the `percent-encoding` crate, version 2.3.2.
1543// Source: https://github.com/servo/rust-url/blob/22b925f93ad505a830f1089538a9ed6f5fd90612/percent_encoding/src/lib.rs#L337-L365
1544//
1545// It is licensed under the same terms as the `percent-encoding` crate (MIT/Apache-2.0).
1546//
1547// This helper is used to efficiently convert a Cow<'_, [u8]> to a Cow<'_, str>
1548// lossily, with a specific optimization to avoid a re-allocation when the input
1549// is an owned, valid UTF-8 Vec<u8>.
1550#[cfg(feature = "alloc")]
1551#[allow(ambiguous_wide_pointer_comparisons)]
1552fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> {
1553    // Note: This function is duplicated in `form_urlencoded/src/query_encoding.rs`.
1554    match input {
1555        Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
1556        Cow::Owned(bytes) => {
1557            match String::from_utf8_lossy(&bytes) {
1558                Cow::Borrowed(utf8) => {
1559                    // If from_utf8_lossy returns a Cow::Borrowed, then we can
1560                    // be sure our original bytes were valid UTF-8. This is because
1561                    // if the bytes were invalid UTF-8 from_utf8_lossy would have
1562                    // to allocate a new owned string to back the Cow so it could
1563                    // replace invalid bytes with a placeholder.
1564
1565                    // First we do a debug_assert to confirm our description above.
1566                    let raw_utf8: *const [u8] = utf8.as_bytes();
1567                    debug_assert!(core::ptr::eq(raw_utf8, &*bytes));
1568
1569                    // Given we know the original input bytes are valid UTF-8,
1570                    // and we have ownership of those bytes, we re-use them and
1571                    // return a Cow::Owned here.
1572                    Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
1573                }
1574                Cow::Owned(s) => Cow::Owned(s),
1575            }
1576        }
1577    }
1578}
1579
1580/// Compare two chunk-iterators by their concatenated byte stream (streaming,
1581/// zero allocations).
1582///
1583/// This is allocation-free: it streams through both iterators, comparing
1584/// overlapping prefixes and carrying the remainder of the longer chunk
1585/// forward into the next round.
1586fn chunks_eq<'a, I1, A, I2, B>(mut a: I1, mut b: I2) -> bool
1587where
1588    A: 'a + AsRef<[u8]> + ?Sized,
1589    B: 'a + AsRef<[u8]> + ?Sized,
1590    I1: Iterator<Item = &'a A>,
1591    I2: Iterator<Item = &'a B>,
1592{
1593    let mut a_rem: &[u8] = &[];
1594    let mut b_rem: &[u8] = &[];
1595
1596    loop {
1597        // If the remainder buffer for 'a' is empty, try to get the next chunk.
1598        if a_rem.is_empty() {
1599            match a.next() {
1600                Some(chunk) => a_rem = chunk.as_ref(),
1601                // 'a' is exhausted. They are equal only if 'b' is also exhausted.
1602                None => return b_rem.is_empty() && b.next().is_none(),
1603            }
1604        }
1605
1606        // If the remainder buffer for 'b' is empty, try to get the next chunk.
1607        if b_rem.is_empty() {
1608            match b.next() {
1609                Some(chunk) => b_rem = chunk.as_ref(),
1610                // 'b' is exhausted, but we know 'a' is not (since a_rem is non-empty).
1611                // Therefore, they cannot be equal.
1612                None => return false,
1613            }
1614        }
1615
1616        // At this point, both a_rem and b_rem are guaranteed to be non-empty.
1617        // Determine the length of the smaller chunk to compare.
1618        let n = a_rem.len().min(b_rem.len());
1619
1620        // Compare the overlapping parts of the chunks.
1621        if a_rem[..n] != b_rem[..n] {
1622            return false;
1623        }
1624
1625        // Move the slices past the part we just compared.
1626        a_rem = &a_rem[n..];
1627        b_rem = &b_rem[n..];
1628    }
1629}
1630
1631#[inline]
1632fn display_bytes_utf8(bytes: &[u8], f: &mut fmt::Formatter<'_>, lossy: bool) -> fmt::Result {
1633    for chunk in bytes.utf8_chunks() {
1634        f.write_str(chunk.valid())?;
1635
1636        if !chunk.invalid().is_empty() {
1637            if lossy {
1638                f.write_char(char::REPLACEMENT_CHARACTER)?
1639            } else {
1640                return Err(fmt::Error);
1641            }
1642        }
1643    }
1644
1645    Ok(())
1646}
1647
1648#[cfg(test)]
1649mod tests {
1650    use core::fmt::Display;
1651    use std::{io::Read as _, string::ToString as _, vec};
1652
1653    use super::*;
1654
1655    // ===================== Escape ===================== //
1656
1657    fn test_escape_typical(input: &str, want: &str) {
1658        let got = escape_str(input).collect::<String>();
1659        assert_eq!(got, want);
1660
1661        // Test PartialEq too
1662        assert_eq!(escape_str(input), want);
1663
1664        // Let's test explicit regardless
1665        let got = explicit::escape_str(input).collect::<String>();
1666        assert_eq!(got, want);
1667
1668        // Test PartialEq too
1669        assert_eq!(escape_str(input), want)
1670    }
1671
1672    #[test]
1673    fn test_empty_string() {
1674        test_escape_typical("", "");
1675    }
1676
1677    #[test]
1678    fn test_quotes() {
1679        test_escape_typical("\"hello\"", "\\\"hello\\\"")
1680    }
1681
1682    #[test]
1683    fn test_backslash() {
1684        test_escape_typical("\\hello\\", "\\\\hello\\\\");
1685    }
1686
1687    #[test]
1688    fn test_slash() {
1689        test_escape_typical("/hello/", "/hello/");
1690    }
1691
1692    #[test]
1693    fn test_control_chars() {
1694        test_escape_typical("\n\r\t\x08\x0C", "\\n\\r\\t\\b\\f");
1695    }
1696
1697    #[test]
1698    fn test_escape_fully() {
1699        let input = "Hello, \"world\"!\nThis contains a \\ backslash and a \t tab.";
1700        let expected = r#"Hello, \"world\"!\nThis contains a \\ backslash and a \t tab."#;
1701        test_escape_typical(input, expected);
1702    }
1703
1704    #[test]
1705    fn test_other_control_chars() {
1706        let input = "Null:\0, Bell:\x07";
1707        let expected = r#"Null:\u0000, Bell:\u0007"#;
1708        test_escape_typical(input, expected);
1709
1710        test_escape_typical("\x00\x1F", "\\u0000\\u001f");
1711        test_escape_typical("\x19", "\\u0019");
1712    }
1713
1714    #[test]
1715    fn test_iterator_chunks() {
1716        let input = "prefix\npostfix";
1717        let mut iter = escape_str(input);
1718        assert_eq!(iter.next(), Some("prefix"));
1719        assert_eq!(iter.next(), Some(r#"\n"#));
1720        assert_eq!(iter.next(), Some("postfix"));
1721        assert_eq!(iter.next(), None);
1722    }
1723
1724    #[test]
1725    fn test_no_escape_needed() {
1726        let input = "A simple string with no escapes.";
1727        let mut iter = escape_str(input);
1728        assert_eq!(iter.next(), Some("A simple string with no escapes."));
1729        assert_eq!(iter.next(), None);
1730
1731        let input = "café";
1732        let mut iter = escape_str(input);
1733        assert_eq!(iter.next(), Some("café"));
1734        assert_eq!(iter.next(), None);
1735
1736        let input = "❤️";
1737        let mut iter = escape_str(input);
1738        assert_eq!(iter.next(), Some("❤️"));
1739        assert_eq!(iter.next(), None);
1740    }
1741
1742    // ===================== Unescape ===================== //
1743
1744    #[test]
1745    fn test_byte_table() {
1746        assert_eq!(byte_as_static_slice(0), &[0]);
1747        assert_eq!(byte_as_static_slice(5), &[5]);
1748        assert_eq!(byte_as_static_slice(255), &[255]);
1749    }
1750
1751    fn test_unescape_typical<I: AsRef<[u8]> + ?Sized>(input: &I, want: &str) {
1752        let got = unescape(input).decode_utf8().unwrap();
1753        assert_eq!(got, want);
1754
1755        // Test PartialEq too
1756        assert_eq!(unescape(input), want);
1757
1758        // Help display
1759        assert_display(unescape(input).display_utf8(), Ok(want));
1760
1761        // Let's test explicit regardless
1762        let got = explicit::unescape(input).decode_utf8().unwrap();
1763        assert_eq!(got, want);
1764
1765        // Test PartialEq too
1766        assert_eq!(explicit::unescape(input), want);
1767
1768        // Help display
1769        assert_display(explicit::unescape(input).display_utf8(), Ok(want));
1770    }
1771
1772    #[test]
1773    fn test_unicode_escape_basic_unescape() {
1774        // \u4E16 => 世 (E4 B8 96)
1775        let s = "X\\u4E16Y";
1776        test_unescape_typical(s, "X世Y");
1777
1778        let s = "Snow: \\u2603"; // \u2603 => ☃
1779        test_unescape_typical(s, "Snow: ☃");
1780
1781        let s = "A \\u03A9 B"; // Ω is U+03A9
1782        test_unescape_typical(s, "A Ω B");
1783    }
1784
1785    #[test]
1786    fn test_surrogate_pair_unescape() {
1787        // 😀 is U+1F600 -> in JSON: \uD83D\uDE00
1788        let s = "A\\uD83D\\uDE00B";
1789        test_unescape_typical(s, "A😀B")
1790    }
1791
1792    #[test]
1793    fn test_invalid_escape_unescape() {
1794        let s = b"\\x";
1795        let mut u = unescape(s);
1796
1797        match u.next() {
1798            Some(Err(UnescapeError {
1799                kind: UnescapeErrorKind::InvalidEscape(InvalidEscapeError { found: b'x' }),
1800                offset: 1,
1801            })) => {}
1802            _ => panic!("expected invalid escape"),
1803        }
1804
1805        // Let's test explicit regardless
1806        let mut u = explicit::unescape(s);
1807
1808        match u.next() {
1809            Some(Err(UnescapeError {
1810                kind: UnescapeErrorKind::InvalidEscape(InvalidEscapeError { found: b'x' }),
1811                offset: 1,
1812            })) => {}
1813            _ => panic!("expected invalid escape"),
1814        }
1815    }
1816
1817    #[test]
1818    fn test_simple_unescape() {
1819        let input = "Hello\\nWorld\\\"!"; // "Hello\nWorld\"!"
1820        test_unescape_typical(input, "Hello\nWorld\"!")
1821    }
1822
1823    #[test]
1824    fn test_truncated_unicode() {
1825        let input = "Trunc: \\u12"; // too short
1826        let it = unescape(input);
1827        let mut found = false;
1828        for r in it {
1829            match r {
1830                Ok(_) => continue,
1831                Err(UnescapeError {
1832                    kind: UnescapeErrorKind::UnexpectedEof,
1833                    offset: 4,
1834                }) => {
1835                    found = true;
1836                    break;
1837                }
1838                Err(_) => break,
1839            }
1840        }
1841        assert!(found);
1842
1843        // Let's test explicit regardless
1844        assert_eq!(
1845            explicit::unescape(input).next(),
1846            Some(Err(UnescapeError {
1847                kind: UnescapeErrorKind::UnexpectedEof,
1848                offset: 4,
1849            }))
1850        );
1851    }
1852
1853    // ===================== Chunk_Eq ===================== //
1854
1855    #[test]
1856    fn test_empty_iterators_are_equal() {
1857        let a: Vec<&[u8]> = vec![];
1858        let b: Vec<&[u8]> = vec![];
1859        assert!(chunks_eq(a.into_iter(), b.into_iter()));
1860    }
1861
1862    #[test]
1863    fn test_empty_vs_non_empty() {
1864        let a: Vec<&[u8]> = vec![];
1865        let b = vec![&[1, 2, 3]];
1866        assert!(!chunks_eq(a.into_iter(), b.into_iter()));
1867
1868        // And the other way around
1869        let a = vec![&[1, 2, 3]];
1870        let b: Vec<&[u8]> = vec![];
1871        assert!(!chunks_eq(a.into_iter(), b.into_iter()));
1872    }
1873
1874    #[test]
1875    fn test_single_identical_chunks() {
1876        let a = vec!["hello world"];
1877        let b = vec!["hello world"];
1878        assert!(chunks_eq(a.into_iter(), b.into_iter()));
1879    }
1880
1881    #[test]
1882    fn test_different_chunk_boundaries_str() {
1883        // This is the key test: the concatenated content is identical,
1884        // but the chunk divisions are different.
1885        let a = vec!["he", "llo", " ", "world"];
1886        let b = vec!["hello ", "wo", "rld"];
1887        assert!(chunks_eq(a.into_iter(), b.into_iter()));
1888    }
1889
1890    #[test]
1891    fn test_different_chunk_boundaries_bytes() {
1892        let a = vec![&[1, 2], &[3, 4, 5][..]];
1893        let b = vec![&[1, 2, 3], &[4, 5][..]];
1894        assert!(chunks_eq(a.into_iter(), b.into_iter()));
1895    }
1896
1897    #[test]
1898    fn test_one_long_vs_many_short() {
1899        let a = vec!["a-long-single-chunk"];
1900        let b = vec!["a", "-", "long", "-", "single", "-", "chunk"];
1901        assert!(chunks_eq(a.into_iter(), b.into_iter()));
1902    }
1903
1904    #[test]
1905    fn test_unequal_content_same_length() {
1906        let a = vec!["hello"];
1907        let b = vec!["hallo"];
1908        assert!(!chunks_eq(a.into_iter(), b.into_iter()));
1909    }
1910
1911    #[test]
1912    fn test_unequal_at_chunk_boundary() {
1913        let a = vec!["ab", "c"]; // "abc"
1914        let b = vec!["ab", "d"]; // "abd"
1915        assert!(!chunks_eq(a.into_iter(), b.into_iter()));
1916    }
1917
1918    #[test]
1919    fn test_one_is_prefix_of_other() {
1920        // a is shorter
1921        let a = vec!["user", "name"]; // "username"
1922        let b = vec!["user", "name", "123"]; // "username123"
1923        assert!(!chunks_eq(a.into_iter(), b.into_iter()));
1924
1925        // b is shorter
1926        let a = vec!["user", "name", "123"];
1927        let b = vec!["user", "name"];
1928        assert!(!chunks_eq(a.into_iter(), b.into_iter()));
1929    }
1930
1931    #[test]
1932    fn test_complex_remainer_logic() {
1933        // This tests the carry-over logic extensively.
1934        // a: [1,2,3], [4,5], [6,7,8,9], [10]
1935        // b: [1,2], [3,4,5,6], [7,8], [9,10]
1936        let a = vec![&[1, 2, 3], &[4, 5][..], &[6, 7, 8, 9], &[10]];
1937        let b = vec![&[1, 2], &[3, 4, 5, 6][..], &[7, 8], &[9, 10]];
1938        assert!(chunks_eq(a.into_iter(), b.into_iter()));
1939    }
1940
1941    #[test]
1942    fn test_with_vec_references() {
1943        let v_a1 = vec![1, 2];
1944        let v_a2 = vec![3, 4, 5];
1945        let a_data = vec![&v_a1, &v_a2];
1946
1947        let v_b1 = vec![1, 2, 3];
1948        let v_b2 = vec![4, 5];
1949        let b_data = vec![&v_b1, &v_b2];
1950        assert!(chunks_eq(a_data.into_iter(), b_data.into_iter()));
1951    }
1952
1953    // ===================== Unescape Read ===================== //
1954    #[test]
1955    fn bytes_provenance() {
1956        // Input chosen so we hit the "final literal" branch and then try to backtrack.
1957        let input = b"hello";
1958        let mut iter = explicit::unescape(input);
1959
1960        // First call yields the entire "hello" as one literal chunk.
1961        let chunk = iter.next().unwrap().unwrap();
1962        assert_eq!(chunk.literal, b"hello");
1963
1964        // At this point, before the fix, `iter.bytes` would have been set to `&[]`
1965        // (not tied to `input`), so later pointer arithmetic could underflow.
1966        // After the fix, `iter.bytes` is `&input[input.len()..]`, which is safe.
1967        assert!(core::ptr::eq(iter.bytes, &input[input.len()..]));
1968
1969        // -- ESCAPE --
1970        let input = "hello";
1971        let mut iter = explicit::escape_str(input);
1972
1973        // First call yields the entire "hello" as one literal chunk.
1974        let chunk = iter.next().unwrap();
1975        assert_eq!(chunk.literal(), "hello");
1976
1977        // At this point, before the fix, `iter.bytes` would have been set to `&[]`
1978        // (not tied to `input`), so later pointer arithmetic could underflow.
1979        // After the fix, `iter.bytes` is `&input[input.len()..]`, which is safe.
1980        assert!(core::ptr::eq(
1981            unsafe { str::from_utf8_unchecked(iter.bytes) },
1982            &input[input.len()..]
1983        ));
1984
1985        // -- ESCAPE --
1986        let mut iter = escape_str(input);
1987
1988        // First call yields the entire "hello" as one literal chunk.
1989        let chunk = iter.next().unwrap();
1990        assert_eq!(chunk, "hello");
1991
1992        // At this point, before the fix, `iter.bytes` would have been set to `&[]`
1993        // (not tied to `input`), so later pointer arithmetic could underflow.
1994        // After the fix, `iter.bytes` is `&input[input.len()..]`, which is safe.
1995        assert!(core::ptr::eq(
1996            unsafe { str::from_utf8_unchecked(iter.bytes) },
1997            &input[input.len()..]
1998        ))
1999    }
2000
2001    #[test]
2002    fn test_read_simple() {
2003        let input = br#"hello world"#;
2004        let mut reader = unescape(input);
2005        let mut buf = [0u8; 20];
2006
2007        let bytes_read = reader.read(&mut buf).unwrap();
2008
2009        assert_eq!(bytes_read, 11);
2010        assert_eq!(&buf[..bytes_read], b"hello world");
2011
2012        // Second read should return 0 (EOF)
2013        let bytes_read_eof = reader.read(&mut buf).unwrap();
2014        assert_eq!(bytes_read_eof, 0);
2015    }
2016
2017    #[test]
2018    fn test_read_with_simple_escapes() {
2019        let input = br#"hello\tworld\nline2"#;
2020        let mut reader = unescape(input);
2021        let mut buf = Vec::new();
2022
2023        reader.read_to_end(&mut buf).unwrap();
2024
2025        assert_eq!(buf, b"hello\tworld\nline2");
2026    }
2027
2028    #[test]
2029    fn test_read_into_small_buffer_multiple_calls() {
2030        let input = br#"this is a long string with no escapes"#;
2031        let mut reader = unescape(input);
2032        let mut buf = [0u8; 10];
2033        let mut result = Vec::new();
2034
2035        loop {
2036            match reader.read(&mut buf) {
2037                Ok(0) => break, // EOF
2038                Ok(n) => {
2039                    result.extend_from_slice(&buf[..n]);
2040                }
2041                Err(e) => panic!("Read error: {}", e),
2042            }
2043        }
2044
2045        assert_eq!(result, input);
2046    }
2047
2048    #[test]
2049    fn test_read_multibyte_char_across_buffer_boundary() {
2050        // The grinning face emoji 😀 is \uD83D\uDE00, which is 4 bytes in UTF-8: 0xF0 0x9F 0x98 0x80
2051        let input = br#"emoji: \uD83D\uDE00 is here"#;
2052        let mut reader = unescape(input);
2053
2054        // Buffer is small, forcing the 4-byte emoji to be written across multiple calls
2055        let mut buf = [0u8; 8];
2056        let mut result = Vec::new();
2057
2058        // First read: "emoji: " (7 bytes) + first byte of emoji
2059        let n1 = reader.read(&mut buf).unwrap();
2060        assert_eq!(n1, 8);
2061        assert_eq!(&buf[..n1], b"emoji: \xF0");
2062        result.extend_from_slice(&buf[..n1]);
2063
2064        // Second read: next 3 bytes of emoji + " is h"
2065        let n2 = reader.read(&mut buf).unwrap();
2066        assert_eq!(n2, 8);
2067        assert_eq!(&buf[..n2], b"\x9F\x98\x80 is h");
2068        result.extend_from_slice(&buf[..n2]);
2069
2070        // Third read: "ere"
2071        let n3 = reader.read(&mut buf).unwrap();
2072        assert_eq!(n3, 3);
2073        assert_eq!(&buf[..n3], b"ere");
2074        result.extend_from_slice(&buf[..n3]);
2075
2076        // Final read should be EOF
2077        let n4 = reader.read(&mut buf).unwrap();
2078        assert_eq!(n4, 0);
2079
2080        assert_eq!(result, b"emoji: \xF0\x9F\x98\x80 is here");
2081        assert_eq!(result, "emoji: 😀 is here".as_bytes());
2082    }
2083
2084    #[test]
2085    fn test_read_error_invalid_escape() {
2086        let input = br#"hello \q world"#;
2087        let mut reader = unescape(input);
2088        let mut buf = [0u8; 20];
2089
2090        let result = reader.read(&mut buf);
2091
2092        assert!(result.is_err());
2093        let err = result.unwrap_err();
2094        assert_eq!(err.kind(), std::io::ErrorKind::InvalidData);
2095        assert!(err.to_string().contains("invalid escape"));
2096    }
2097
2098    #[test]
2099    fn test_read_error_lone_surrogate() {
2100        let input = br#"\uD83D rest of data seen"#; // High surrogate without a following low one
2101        let mut reader = unescape(input);
2102        let mut buf = [0u8; 10];
2103
2104        let err = reader.read(&mut buf).unwrap_err();
2105        assert_eq!(err.kind(), std::io::ErrorKind::InvalidData);
2106        assert!(err.to_string().contains("lone surrogate"));
2107    }
2108
2109    #[test]
2110    fn test_read_empty_input() {
2111        let input = b"";
2112        let mut reader = unescape(input);
2113        let mut buf = [0u8; 10];
2114        let bytes_read = reader.read(&mut buf).unwrap();
2115        assert_eq!(bytes_read, 0);
2116    }
2117
2118    #[test]
2119    fn test_read_into_empty_buffer() {
2120        let input = b"hello";
2121        let mut reader = unescape(input);
2122        let mut buf = [0u8; 0];
2123        let bytes_read = reader.read(&mut buf).unwrap();
2124        // A read into an empty buffer should always succeed and return 0.
2125        assert_eq!(bytes_read, 0);
2126    }
2127
2128    #[test]
2129    fn test_read_to_end_optimized() {
2130        let input = br#"first\nsecond\tthird \uD83D\uDE00 last"#;
2131        let mut reader = unescape(input);
2132        let mut buf = Vec::new();
2133
2134        let bytes_read = reader.read_to_end(&mut buf).unwrap();
2135        let expected = b"first\nsecond\tthird \xF0\x9F\x98\x80 last";
2136
2137        assert_eq!(bytes_read, expected.len());
2138        assert_eq!(buf, expected);
2139    }
2140
2141    // ===================== Unescape Display ===================== //
2142
2143    fn assert_display(display: impl Display, want: Result<&str, ()>) {
2144        let mut w = String::new();
2145        let res = fmt::write(&mut w, format_args!("{display}"));
2146
2147        match want {
2148            Ok(want) => {
2149                assert!(res.is_ok());
2150                assert_eq!(w, want)
2151            }
2152            Err(_) => assert!(
2153                res.is_err(),
2154                "strict mode should return Err on invalid bytes"
2155            ),
2156        }
2157    }
2158
2159    // -- NON-LOSSY TESTS (must be perfect) --
2160
2161    #[test]
2162    fn test_display_simple_string() {
2163        let display = unescape("hello world").display_utf8();
2164        assert_display(display, Ok("hello world"));
2165    }
2166
2167    #[test]
2168    fn test_display_empty_string() {
2169        assert_display(unescape("").display_utf8(), Ok(""));
2170    }
2171
2172    #[test]
2173    fn test_display_standard_escapes() {
2174        let input = br#"\" \\ \/ \b \f \n \r \t"#;
2175        let expected = "\" \\ / \x08 \x0C \n \r \t";
2176        assert_display(unescape(input).display_utf8(), Ok(expected));
2177    }
2178
2179    #[test]
2180    fn test_display_non_escaped_utf8() {
2181        let input = "你好, world".as_bytes();
2182        let expected = "你好, world";
2183        assert_display(unescape(input).display_utf8(), Ok(expected));
2184    }
2185
2186    #[test]
2187    fn test_display_unicode_escape_bmp() {
2188        // cent sign: \u00A2 -> C2 A2 (2 bytes)
2189        let input = br"a\u00A2b";
2190        let expected = "a¢b";
2191        assert_display(unescape(input).display_utf8(), Ok(expected));
2192    }
2193
2194    #[test]
2195    fn test_display_mixed_content() {
2196        let input = br#"Text with \n, \u00A2, and \uD83D\uDE0E emojis."#;
2197        let expected = "Text with \n, ¢, and 😎 emojis.";
2198        assert_display(unescape(input).display_utf8(), Ok(expected));
2199    }
2200
2201    #[test]
2202    fn test_display_starts_and_ends_with_escape() {
2203        let input = br#"\u00A2hello\t"#;
2204        let expected = "¢hello\t";
2205        assert_display(unescape(input).display_utf8(), Ok(expected));
2206    }
2207
2208    // -- NON-LOSSY ERROR TESTS --
2209
2210    #[test]
2211    fn test_display_err_invalid_escape() {
2212        assert_display(unescape(br"hello \z world").display_utf8(), Err(()));
2213    }
2214
2215    #[test]
2216    fn test_display_err_incomplete_unicode() {
2217        assert_display(unescape(br"\u123").display_utf8(), Err(()));
2218    }
2219
2220    #[test]
2221    fn test_display_err_invalid_hex_in_unicode() {
2222        assert_display(unescape(br"\u123g").display_utf8(), Err(()));
2223    }
2224
2225    #[test]
2226    fn test_display_err_lone_high_surrogate() {
2227        assert_display(unescape(br"\uD800").display_utf8(), Err(()));
2228    }
2229
2230    #[test]
2231    fn test_display_err_high_surrogate_not_followed_by_low() {
2232        assert_display(unescape(br"\uD800\uABCD").display_utf8(), Err(()));
2233    }
2234
2235    #[test]
2236    fn test_display_err_invalid_source_utf8() {
2237        // A valid UTF-8 sequence for 'h' followed by an invalid byte
2238        assert_display(unescape(b"h\x80ello").display_utf8(), Err(()));
2239    }
2240
2241    #[test]
2242    fn strict_valid_multi_byte_split() {
2243        // "€" U+20AC => bytes [0xE2, 0x82, 0xAC]
2244        let input = &[0xE2, 0x82, 0xAC];
2245        let display = unescape(input).display_utf8();
2246        assert_display(display, Ok("€"));
2247    }
2248
2249    #[test]
2250    fn strict_errors_on_invalid_start_byte() {
2251        let input = &[0xFF, b'a'];
2252        let display = unescape(input).display_utf8();
2253
2254        assert_display(display, Err(()));
2255    }
2256
2257    // -- LOSSY TESTS --
2258
2259    #[test]
2260    fn lossy_replaces_invalid_start_byte() {
2261        // 0xFF is invalid as a leading UTF-8 byte.
2262        let input = &[0xFF, b'a']; // invalid byte then ASCII 'a';
2263        let display = unescape(input).display_utf8_lossy();
2264        // replacement char + 'a'
2265        assert_display(display, Ok("\u{FFFD}a"));
2266    }
2267
2268    #[test]
2269    fn lossy_handles_trailing_incomplete_bytes() {
2270        // A trailing incomplete 3-byte sequence: [0xE2, 0x82] (missing 0xAC)
2271        let input: &[u8] = &[0xE2, 0x82];
2272        let display = unescape(input).display_utf8_lossy();
2273        // Should replace incomplete tail with U+FFFD.
2274        assert_display(display, Ok("\u{FFFD}"));
2275    }
2276
2277    #[test]
2278    fn test_display_lossy_invalid_source_utf8() {
2279        // The invalid byte sequence should be replaced.
2280        let input = b"valid\xF0\x90\x80invalid";
2281        let expected = "valid\u{FFFD}invalid";
2282        assert_display(unescape(input).display_utf8_lossy(), Ok(expected));
2283    }
2284
2285    #[test]
2286    fn test_display_lossy_invalid_escape_truncates() {
2287        // In lossy mode, an invalid JSON escape stops the processing.
2288        let input = br"this is ok \n but this is not \z";
2289        let expected = "this is ok \n";
2290        assert_display(unescape(input).display_utf8_lossy(), Ok(expected));
2291    }
2292
2293    #[test]
2294    fn test_display_lossy_incomplete_unicode_truncates() {
2295        let input = br"truncate after \n \uD83D";
2296        let expected = "truncate after \n";
2297        assert_display(unescape(input).display_utf8_lossy(), Ok(expected));
2298    }
2299
2300    // Inspired by and copied from memchr
2301    #[test]
2302    fn sync_regression() {
2303        use core::panic::{RefUnwindSafe, UnwindSafe};
2304
2305        fn assert_send_sync<T: Send + Sync + UnwindSafe + RefUnwindSafe>() {}
2306        assert_send_sync::<Unescape<'_>>();
2307        assert_send_sync::<Escape<'_>>();
2308    }
2309}
2310
2311#[cfg(test)]
2312mod find_escape_char_tests {
2313    use std::format;
2314
2315    use super::{ESCAPE_DECISION_TABLE, find_escape_char};
2316
2317    /// Helper function to run a single test case and provide a clear error message on failure.
2318    fn run_test(input: &str, expected: Option<usize>, case_name: &str) {
2319        let result = find_escape_char(input.as_bytes());
2320        assert_eq!(result, expected, "Failed test case: '{}'", case_name);
2321    }
2322
2323    #[test]
2324    fn test_no_escapes() {
2325        run_test("", None, "Empty string");
2326        run_test("Hello, world!", None, "Simple ASCII");
2327        run_test("This string is exactly 16 bytes", None, "16-byte ASCII");
2328        run_test(
2329            "This string is over 16 bytes long now",
2330            None,
2331            "Over 16-byte ASCII",
2332        );
2333
2334        // The original source of the bug: non-ASCII UTF-8 characters.
2335        // This ensures the signedness bug is truly fixed.
2336        run_test("Hello, éàçüö!", None, "Non-ASCII UTF-8");
2337        run_test("Testing with emojis 😀❤️✅", None, "Emojis");
2338    }
2339
2340    #[test]
2341    fn test_single_escapes() {
2342        run_test("\"", Some(0), "Quote at start");
2343        run_test("Hello \" world", Some(6), "Quote in middle");
2344        run_test("Hello\\", Some(5), "Backslash at end");
2345        run_test("\n", Some(0), "Control char (newline) at start");
2346        run_test("Hello\tworld", Some(5), "Control char (tab) in middle");
2347        run_test(
2348            "Control char at end\u{08}",
2349            Some(19),
2350            "Control char (backspace) at end",
2351        );
2352    }
2353
2354    #[test]
2355    fn test_finds_first_of_multiple() {
2356        // This confirms it always finds the *first* match, not a later one.
2357        run_test("a\"b\\c\nd", Some(1), "Finds first quote");
2358        run_test("ab\\c\"d\ne", Some(2), "Finds first backslash");
2359        run_test("abc\nd\"e\\f", Some(3), "Finds first control char");
2360        run_test("\"\n\\", Some(0), "Multiple escapes at start");
2361    }
2362
2363    #[test]
2364    fn test_simd_chunk_boundaries() {
2365        // These tests are critical for verifying the SIMD logic. A chunk is 16 bytes.
2366        let s15 = "a".repeat(15);
2367        let s16 = "a".repeat(16);
2368        let s17 = "a".repeat(17);
2369
2370        // Escape at the exact end of the first 16-byte chunk
2371        run_test(&format!("{}\"", s15), Some(15), "Escape at index 15");
2372
2373        // Escape at the exact start of the second 16-byte chunk
2374        run_test(&format!("{}\n", s16), Some(16), "Escape at index 16");
2375
2376        // Escape within the second chunk
2377        run_test(&format!("{}\t", s17), Some(17), "Escape at index 17");
2378
2379        // A long string with an escape several chunks in
2380        let long = "a".repeat(40);
2381        run_test(
2382            &format!("{}\\\\", long),
2383            Some(40),
2384            "Escape deep in a long string",
2385        );
2386    }
2387
2388    #[test]
2389    fn test_remainder_logic() {
2390        // These tests ensure the scalar fallback logic works correctly for inputs
2391        // that are not a multiple of 16 bytes long.
2392
2393        // String shorter than 16 bytes
2394        run_test("short\nstring", Some(5), "Short string with escape");
2395        run_test("no escapes", None, "Short string no escape");
2396
2397        // String with 17 bytes (16 for SIMD, 1 for remainder)
2398        let s16 = "a".repeat(16);
2399        run_test(
2400            &format!("{}\"", s16),
2401            Some(16),
2402            "Escape in 1-byte remainder",
2403        );
2404
2405        // String with 31 bytes (16 for SIMD, 15 for remainder)
2406        let s15 = "b".repeat(15);
2407        run_test(
2408            &format!("{}{}\t", s15, s15),
2409            Some(30),
2410            "Escape at end of 15-byte remainder",
2411        );
2412    }
2413
2414    #[test]
2415    fn test_all_escapable_bytes_individually() {
2416        // This is the ultimate test. It iterates through all 256 possible byte values
2417        // and confirms that our function's decision matches the ESCAPE_DECISION_TABLE.
2418        let prefix = "0123456789abcdef"; // A 16-byte safe prefix to engage the SIMD loop.
2419
2420        for byte_val in 0..=255u8 {
2421            // We can't create a &str from invalid UTF-8, so we work with byte slices.
2422            let mut test_bytes = prefix.as_bytes().to_vec();
2423            test_bytes.push(byte_val);
2424
2425            let result = find_escape_char(&test_bytes);
2426            let expected_to_escape = ESCAPE_DECISION_TABLE[byte_val as usize] == 1;
2427
2428            if expected_to_escape {
2429                // If this byte SHOULD be escaped, we expect to find it at index 16.
2430                assert_eq!(
2431                    result,
2432                    Some(16),
2433                    "Failed to find required escape for byte 0x{:02X}",
2434                    byte_val
2435                );
2436            } else {
2437                // If this byte should NOT be escaped, we expect to find nothing.
2438                assert_eq!(
2439                    result, None,
2440                    "Incorrectly found an escape for byte 0x{:02X}",
2441                    byte_val
2442                );
2443            }
2444        }
2445    }
2446}