json_escape/explicit.rs
1//! More explicit and fine-grained iterators for JSON escaping and unescaping.
2//!
3//! This module provides an alternative API to the one in the crate root. While the
4//! root API yields slices (`&str` or `&[u8]`) that represent the final output,
5//! this module's iterators yield "chunk" structs. These structs distinguish between
6//! parts of the input that were processed literally and the specific characters
7//! that were escaped or unescaped.
8//!
9//! This approach offers several advantages:
10//! - **Greater Control**: You can inspect each component of the transformation,
11//! which can be useful for debugging, logging, or more complex data processing.
12//! - **Potential Performance**: By avoiding the need to look up single-byte escape
13//! sequences in a table on every iteration, some workflows may see a minor
14//! performance improvement.
15//! - **Clarity**: The structure of the output more closely reflects the transformation
16//! process, which can make the logic easier to follow.
17//!
18//! # Example: Escaping
19//!
20//! ```
21//! use json_escape::explicit::escape_str;
22//!
23//! let mut escaper = escape_str("a\nb");
24//!
25//! // The first chunk contains the literal "a" and the escaped newline.
26//! let chunk1 = escaper.next().unwrap();
27//! assert_eq!("a", chunk1.literal());
28//! assert_eq!(Some(r#"\n"#), chunk1.escaped());
29//!
30//! // The second chunk contains the literal "b" and no escaped sequence.
31//! let chunk2 = escaper.next().unwrap();
32//! assert_eq!("b", chunk2.literal());
33//! assert_eq!(None, chunk2.escaped());
34//!
35//! // The iterator is now exhausted.
36//! assert!(escaper.next().is_none());
37//! ```
38//!
39//! # Example: Unescaping
40//!
41//! ```
42//! use json_escape::explicit::unescape;
43//!
44//! let mut unescaper = unescape(br"hello\tworld");
45//!
46//! // The first chunk contains the literal "hello" and the unescaped tab.
47//! let chunk1 = unescaper.next().unwrap().unwrap();
48//! assert_eq!(b"hello", chunk1.literal());
49//! assert_eq!(Some('\t'), chunk1.unescaped());
50//!
51//! // The second chunk contains the literal "world" and no unescaped character.
52//! let chunk2 = unescaper.next().unwrap().unwrap();
53//! assert_eq!(b"world", chunk2.literal());
54//! assert_eq!(None, chunk2.unescaped());
55//!
56//! // The iterator is now exhausted.
57//! assert!(unescaper.next().is_none());
58//! ```
59//!
60//! Both `Escape` and `Unescape` iterators provide `display` helpers for easy integration
61//! with Rust's formatting system, preserving the zero-allocation benefits of the main API.
62
63#[cfg(feature = "alloc")]
64use crate::DecodeUtf8Error;
65use crate::{ESCAPE_TABLE, InvalidHexError, LoneSurrogateError, UnescapeError, display_bytes_utf8};
66use crate::{InvalidEscapeError, UnescapeErrorKind, find_escape_char};
67use core::fmt;
68use core::iter::FusedIterator;
69use core::str;
70
71#[cfg(feature = "alloc")]
72use alloc::{borrow::Cow, string::String, vec::Vec};
73
74//==============================================================================
75// Escaping
76//==============================================================================
77
78/// Creates an iterator that yields chunks of an escaped JSON string.
79///
80/// See the [module-level documentation](self) for more details.
81#[inline]
82pub fn escape_str(s: &str) -> Escape<'_> {
83 Escape {
84 bytes: s.as_bytes(),
85 }
86}
87
88/// A chunk of a JSON-escaped string, separating the literal part from the escaped sequence.
89///
90/// This struct is yielded by the [`Escape`] iterator.
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
92pub struct EscapedChunk<'a> {
93 /// A slice of the original input that did not require escaping.
94 literal: &'a str,
95 /// The escaped sequence (e.g., `r#"\n"#`, `r#"\""#`) that immediately follows the literal part.
96 /// Is `None` if this is the last chunk and it has no trailing escape.
97 escaped: Option<&'static str>,
98}
99
100impl<'a> EscapedChunk<'a> {
101 /// Returns the literal part of the chunk, which is a slice of the original string.
102 #[inline]
103 pub const fn literal(&self) -> &'a str {
104 self.literal
105 }
106
107 /// Returns the escaped part of the chunk, if any.
108 #[inline]
109 pub const fn escaped(&self) -> Option<&'static str> {
110 self.escaped
111 }
112}
113
114impl<'a> fmt::Display for EscapedChunk<'a> {
115 #[inline]
116 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
117 f.write_str(self.literal)?;
118 if let Some(s) = self.escaped {
119 f.write_str(s)?;
120 }
121 Ok(())
122 }
123}
124
125/// An iterator over a string that yields [`EscapedChunk`]s.
126///
127/// Created by the [`escape_str`] function.
128#[derive(Clone)]
129#[must_use = "iterators are lazy and do nothing unless consumed"]
130pub struct Escape<'a> {
131 pub(crate) bytes: &'a [u8],
132}
133
134impl<'a> Iterator for Escape<'a> {
135 type Item = EscapedChunk<'a>;
136
137 #[inline]
138 fn next(&mut self) -> Option<Self::Item> {
139 if self.bytes.is_empty() {
140 return None;
141 }
142
143 let pos = find_escape_char(self.bytes).unwrap_or(self.bytes.len());
144 let (literal_bytes, rest) = self.bytes.split_at(pos);
145
146 // SAFETY: `find_escape_char` guarantees `pos` is on a UTF-8 boundary.
147 let literal = unsafe { str::from_utf8_unchecked(literal_bytes) };
148
149 if rest.is_empty() {
150 self.bytes = &self.bytes[self.bytes.len()..];
151 Some(EscapedChunk {
152 literal,
153 escaped: None,
154 })
155 } else {
156 let escaped_char_byte = rest[0];
157 self.bytes = &rest[1..];
158 Some(EscapedChunk {
159 literal,
160 escaped: Some(
161 ESCAPE_TABLE[escaped_char_byte as usize]
162 .expect("find_escape_char found a byte not in ESCAPE_TABLE"),
163 ),
164 })
165 }
166 }
167
168 fn size_hint(&self) -> (usize, Option<usize>) {
169 if self.bytes.is_empty() {
170 (0, Some(0))
171 } else {
172 // We'll yield at least 1 chunk, and at most `len` chunks if every byte is escaped.
173 (1, Some(self.bytes.len()))
174 }
175 }
176}
177
178impl<'a> FusedIterator for Escape<'a> {}
179
180impl<'a> fmt::Display for Escape<'a> {
181 /// This allows the escaped output to be written directly to a formatter
182 /// without intermediate allocation.
183 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
184 for chunk in self.clone() {
185 write!(f, "{chunk}")?;
186 }
187 Ok(())
188 }
189}
190
191impl fmt::Debug for Escape<'_> {
192 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
193 f.debug_struct("Escape").finish_non_exhaustive()
194 }
195}
196
197impl<B: AsRef<[u8]> + ?Sized> PartialEq<B> for Escape<'_> {
198 /// Compares the escaped output with any byte-slice-like object.
199 ///
200 /// This is a convenience for testing, allowing you to check the fully
201 /// concatenated result of an `Escape` iterator against a known `&str` or `&[u8]`.
202 fn eq(&self, other: &B) -> bool {
203 let mut other = other.as_ref();
204 for chunk in self.clone() {
205 // Check literal part
206 if !other.starts_with(chunk.literal.as_bytes()) {
207 return false;
208 }
209 other = &other[chunk.literal.len()..];
210
211 // Check escaped part
212 if let Some(escaped_str) = chunk.escaped {
213 if !other.starts_with(escaped_str.as_bytes()) {
214 return false;
215 }
216 other = &other[escaped_str.len()..];
217 }
218 }
219 other.is_empty()
220 }
221}
222
223impl<'a, 'b> PartialEq<Escape<'a>> for Escape<'b> {
224 /// Compares two `Escape` iterators for equality.
225 ///
226 /// Two `Escape` iterators are considered equal if they'll produce the same **output**.
227 /// It first performs a fast check on the underlying byte slices.
228 fn eq(&self, other: &Escape<'a>) -> bool {
229 // The crate parallel is easier
230 crate::Escape { bytes: self.bytes } == crate::Escape { bytes: other.bytes }
231 }
232}
233
234#[cfg(feature = "alloc")]
235impl<'a> From<Escape<'a>> for Cow<'a, str> {
236 /// Efficiently collects the escaped parts into a `Cow<'a, str>`.
237 ///
238 /// This implementation is optimized to avoid allocation if possible:
239 /// - If the input string requires **no escaping**, it returns `Cow::Borrowed`
240 /// with a slice of the original string.
241 /// - If escaping is needed, it allocates a `String` and returns `Cow::Owned`.
242 fn from(mut iter: Escape<'a>) -> Self {
243 match iter.next() {
244 None => Cow::Borrowed(""),
245 Some(first) => {
246 if first.escaped.is_none() {
247 // No escape in the first (and only) chunk, so no escaping was needed.
248 Cow::Borrowed(first.literal)
249 } else {
250 // Escaping occurred. We must allocate.
251 let mut s = String::with_capacity(iter.bytes.len() + 16);
252 s.push_str(first.literal);
253 s.push_str(first.escaped.unwrap());
254 s.extend(iter);
255 Cow::Owned(s)
256 }
257 }
258 }
259 }
260}
261
262//==============================================================================
263// Unescaping
264//==============================================================================
265
266/// Creates an iterator that yields chunks of an unescaped JSON string.
267///
268/// See the [module-level documentation](self) for more details.
269#[inline]
270pub fn unescape<I: AsRef<[u8]> + ?Sized>(input: &I) -> Unescape<'_> {
271 Unescape {
272 bytes: input.as_ref(),
273 }
274}
275
276/// Creates a streaming JSON string unescaper that handles enclosing quotes.
277///
278/// This function is a convenience wrapper around [`unescape`]. If the input byte
279/// slice starts and ends with a double-quote (`"`), the quotes are trimmed
280/// before the content is unescaped.
281///
282/// If the input is not enclosed in quotes, this function behaves identically to
283/// [`unescape`].
284///
285/// # Examples
286///
287/// ```
288/// use json_escape::explicit::unescape_quoted;
289///
290/// // An input string with quotes and an escaped tab.
291/// let bytes = br#""\tline""#;
292/// let mut unescaper = unescape_quoted(bytes);
293///
294/// // The first chunk is the unescaped tab character.
295/// let chunk1 = unescaper.next().unwrap().unwrap();
296/// assert_eq!(b"", chunk1.literal());
297/// assert_eq!(Some('\t'), chunk1.unescaped());
298///
299/// // The second chunk is the literal "line".
300/// let chunk2 = unescaper.next().unwrap().unwrap();
301/// assert_eq!(b"line", chunk2.literal());
302/// assert_eq!(None, chunk2.unescaped());
303///
304/// // The iterator is now exhausted.
305/// assert!(unescaper.next().is_none());
306/// ```
307#[inline]
308pub fn unescape_quoted(bytes: &[u8]) -> Unescape<'_> {
309 let inner = if bytes.len() >= 2 && bytes.first() == Some(&b'"') && bytes.last() == Some(&b'"') {
310 &bytes[1..bytes.len() - 1]
311 } else {
312 bytes
313 };
314 unescape(inner)
315}
316
317/// A chunk of a JSON-unescaped byte slice, separating the literal part from the unescaped character.
318///
319/// This struct is yielded by the [`Unescape`] iterator.
320#[derive(Debug, Clone, Copy, PartialEq, Eq)]
321pub struct UnescapedChunk<'a> {
322 /// A slice of the original input that did not require unescaping.
323 pub(crate) literal: &'a [u8],
324 /// The single character that was unescaped.
325 /// Is `None` if this is the last chunk and it has no trailing unescaped character.
326 pub(crate) unescaped: Option<char>,
327}
328
329impl<'a> UnescapedChunk<'a> {
330 /// Returns the literal part of the chunk, which is a slice of the original bytes.
331 #[inline]
332 pub const fn literal(&self) -> &'a [u8] {
333 self.literal
334 }
335
336 /// Returns the unescaped character, if any.
337 #[inline]
338 pub const fn unescaped(&self) -> Option<char> {
339 self.unescaped
340 }
341
342 /// Returns a displayable wrapper that will format the chunk as a UTF-8 string.
343 ///
344 /// If the literal part of the chunk contains invalid UTF-8 sequences, this
345 /// will result in a `fmt::Error`.
346 pub fn display_utf8(&self) -> DisplayUnescapedChunk<'_> {
347 DisplayUnescapedChunk {
348 chunk: self,
349 lossy: false,
350 }
351 }
352
353 /// Returns a displayable wrapper that will format the chunk as a lossy UTF-8 string.
354 ///
355 /// Any invalid UTF-8 sequences in the literal part of the chunk will be
356 /// replaced with the U+FFFD replacement character.
357 pub fn display_utf8_lossy(&self) -> DisplayUnescapedChunk<'_> {
358 DisplayUnescapedChunk {
359 chunk: self,
360 lossy: true,
361 }
362 }
363}
364
365/// Helper struct for safely displaying an [`UnescapedChunk`].
366pub struct DisplayUnescapedChunk<'a> {
367 chunk: &'a UnescapedChunk<'a>,
368 lossy: bool,
369}
370
371impl<'a> fmt::Display for DisplayUnescapedChunk<'a> {
372 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
373 display_bytes_utf8(self.chunk.literal, f, self.lossy)?;
374 if let Some(c) = self.chunk.unescaped {
375 use fmt::Write as _;
376
377 f.write_char(c)?;
378 }
379 Ok(())
380 }
381}
382
383/// An iterator over a byte slice that yields [`UnescapedChunk`]s.
384///
385/// Created by the [`unescape`] function.
386#[derive(Clone)]
387#[must_use = "iterators are lazy and do nothing unless consumed"]
388pub struct Unescape<'a> {
389 pub(crate) bytes: &'a [u8],
390}
391
392impl<'a> Iterator for Unescape<'a> {
393 type Item = Result<UnescapedChunk<'a>, UnescapeError>;
394
395 #[inline]
396 fn next(&mut self) -> Option<Self::Item> {
397 use memchr::memchr;
398
399 if self.bytes.is_empty() {
400 return None;
401 }
402
403 let pos = match memchr(b'\\', self.bytes) {
404 Some(p) => p,
405 None => {
406 // No more backslashes, yield the rest as a final literal chunk.
407 let chunk = UnescapedChunk {
408 literal: self.bytes,
409 unescaped: None,
410 };
411 self.bytes = &self.bytes[self.bytes.len()..]; // fix: totalk
412 return Some(Ok(chunk));
413 }
414 };
415
416 let (literal, rest) = self.bytes.split_at(pos);
417 // rest starts with '\\'
418 let mut remainder = &rest[1..];
419
420 let unescaped_char = match remainder.first() {
421 Some(b'u') => {
422 // Temporarily advance past 'u'
423 remainder = &remainder[1..];
424 // Use a helper from the main unescaper, giving it a mutable slice reference
425 // that it can advance.
426 match Self::handle_unicode_escape(&mut remainder) {
427 Ok(c) => c,
428 Err(e) => {
429 // FIX: handle_unicode_escape_from_slice already handles this for us.
430 // Adjust offset: error is relative to `\u`, but we need it relative to chunk start.
431 return Some(Err(e));
432 }
433 }
434 }
435 Some(&byte) => {
436 remainder = &remainder[1..];
437 match UNESCAPE_TABLE[byte as usize] {
438 Some(c) => c,
439 None => {
440 return Some(Err(UnescapeError {
441 kind: UnescapeErrorKind::InvalidEscape(InvalidEscapeError {
442 found: byte,
443 }),
444 // The invalid character is 1 byte after '\'.
445 offset: 1,
446 }));
447 }
448 }
449 }
450 None => {
451 return Some(Err(UnescapeError {
452 kind: UnescapeErrorKind::UnexpectedEof,
453 // EOF occurred 1 byte after '\'.
454 offset: 1,
455 }));
456 }
457 };
458
459 self.bytes = remainder;
460 Some(Ok(UnescapedChunk {
461 literal,
462 unescaped: Some(unescaped_char),
463 }))
464 }
465
466 fn size_hint(&self) -> (usize, Option<usize>) {
467 if self.bytes.is_empty() {
468 (0, Some(0))
469 } else {
470 // Worst-case is \uXXXX -> 1 byte, so 6 -> 1.
471 (
472 self.bytes.len().saturating_add(1) / 6,
473 Some(self.bytes.len()),
474 )
475 }
476 }
477}
478
479impl<'a> FusedIterator for Unescape<'a> {}
480
481impl<'a> Unescape<'a> {
482 /// Decodes the unescaped byte stream into a UTF-8 string.
483 ///
484 /// This method consumes the iterator and collects all resulting byte chunks
485 /// into a `Cow<[u8]>`, which is then validated as UTF-8. If an unescaping
486 /// error occurs, it's returned immediately. If the final sequence of bytes
487 /// is not valid UTF-8, a UTF-8 error is returned.
488 ///
489 /// This is optimized to return a `Cow::Borrowed` if no escapes were present
490 /// in the input, avoiding allocation.
491 ///
492 /// **Requires the `alloc` feature.**
493 ///
494 /// # Example
495 ///
496 /// ```
497 /// # #[cfg(feature = "alloc")] {
498 /// use json_escape::explicit::unescape;
499 ///
500 /// let input = r#"Emoji: \uD83D\uDE00"#;
501 /// let cow = unescape(input).decode_utf8().unwrap();
502 ///
503 /// assert_eq!(cow, "Emoji: π");
504 /// # }
505 /// ```
506 #[cfg(feature = "alloc")]
507 pub fn decode_utf8(self) -> Result<Cow<'a, str>, DecodeUtf8Error> {
508 match self.try_into().map_err(DecodeUtf8Error::Unescape)? {
509 Cow::Borrowed(bytes) => str::from_utf8(bytes)
510 .map(Cow::Borrowed)
511 .map_err(DecodeUtf8Error::Utf8),
512 Cow::Owned(bytes) => String::from_utf8(bytes)
513 .map(Cow::Owned)
514 .map_err(|e| DecodeUtf8Error::Utf8(e.utf8_error())),
515 }
516 }
517
518 /// Decodes the unescaped byte stream lossily into a UTF-8 string.
519 ///
520 /// This is similar to [`Unescape::decode_utf8`] but replaces any invalid UTF-8 sequences
521 /// with the replacement character (`U+FFFD`) instead of returning an error.
522 ///
523 /// An `UnescapeError` can still be returned if the JSON escaping itself is invalid.
524 ///
525 /// **Requires the `alloc` feature.**
526 #[cfg(feature = "alloc")]
527 pub fn decode_utf8_lossy(self) -> Result<Cow<'a, str>, UnescapeError> {
528 use crate::decode_utf8_lossy;
529
530 Ok(decode_utf8_lossy(self.try_into()?))
531 }
532
533 /// Returns a wrapper that implements [`fmt::Display`].
534 ///
535 /// If an unescaping error or invalid UTF-8 sequence is encountered,
536 /// a `fmt::Error` is returned, which will cause `format!` and friends to panic.
537 pub fn display_utf8(self) -> DisplayUnescape<'a> {
538 DisplayUnescape {
539 inner: self,
540 lossy: false,
541 }
542 }
543
544 /// Returns a wrapper that implements [`fmt::Display` for lossy UTF-8 decoding.
545 ///
546 /// Invalid UTF-8 sequences will be replaced with the replacement character.
547 /// An unescaping error will still result in a `fmt::Error`.
548 pub fn display_utf8_lossy(self) -> DisplayUnescape<'a> {
549 DisplayUnescape {
550 inner: self,
551 lossy: true,
552 }
553 }
554
555 /// Parses a unicode escape sequence `\uXXXX` which may be a surrogate pair.
556 /// The input slice `bytes` must be positioned *after* the `\u`.
557 ///
558 /// On success, returns the parsed `char` and advances the slice.
559 /// On error, returns an `Err` and the input slice is not modified.
560 #[inline(always)]
561 fn handle_unicode_escape(bytes: &mut &'a [u8]) -> Result<char, UnescapeError> {
562 // Parse first 4 hex digits (\uXXXX)
563 //
564 // The slice starts *after* '\u'. The first hex digit is at offset 2 from '\'.
565 let first = Self::parse_hex4(bytes, 2)?;
566
567 // High surrogate β must be followed by another \uXXXX low surrogate
568 if (0xD800..=0xDBFF).contains(&first) {
569 let remaining = &bytes[4..];
570
571 const N: usize = b"\\u".len();
572
573 // EOF before even seeing '\' or 'u' β UnexpectedEof
574 if remaining.len() < N {
575 return Err(UnescapeError {
576 kind: UnescapeErrorKind::UnexpectedEof,
577 offset: 6,
578 });
579 }
580
581 // Check for a following `\u` and enough bytes for the second hex sequence.
582 if b"\\u" == &remaining[..N] {
583 // Try parsing the low surrogate. The slice is advanced by 2 for the `\u`.
584 // The first hex digit of the second escape is at offset 8.
585 // (\uXXXX\u -> 8 chars from the initial '\')
586 match Self::parse_hex4(&remaining[2..], 8) {
587 Ok(low) if (0xDC00..=0xDFFF).contains(&low) => {
588 // We found a valid low surrogate. Combine them.
589 let high_t = first as u32;
590 let low_t = low as u32;
591 let code = 0x10000 + (((high_t - 0xD800) << 10) | (low_t - 0xDC00));
592 let result_char = char::from_u32(code)
593 .expect("valid surrogate pair math should always produce a valid char");
594
595 // SUCCESS: Advance the original slice past the entire surrogate pair (\uXXXX\uXXXX).
596 *bytes = &remaining[6..]; // Consumes 4 + 2 + 4 = 10 bytes total from the original slice
597 return Ok(result_char);
598 }
599 Ok(_) => {
600 // Got a full escape but not a low surrogate β Lone surrogate
601 return Err(UnescapeError {
602 kind: UnescapeErrorKind::LoneSurrogate(LoneSurrogateError {
603 surrogate: first,
604 }),
605 offset: 6,
606 });
607 }
608 Err(err) => {
609 // parse_hex4 failed for the second part.
610 return Err(err);
611 }
612 }
613 } else {
614 // High surrogate was not followed by a `\u` sequence.
615 return Err(UnescapeError {
616 kind: UnescapeErrorKind::LoneSurrogate(LoneSurrogateError { surrogate: first }),
617 // The error is detected after consuming `\uXXXX` (6 bytes total from '\').
618 offset: 6,
619 });
620 }
621 }
622
623 // Not a surrogate β normal path
624 match char::from_u32(first as u32) {
625 Some(c) => {
626 // SUCCESS: Advance the original slice past the 4 hex digits.
627 *bytes = &bytes[4..];
628 Ok(c)
629 }
630 None => Err(UnescapeError {
631 // The parsed value is not a valid char (e.g., a lone low surrogate).
632 kind: UnescapeErrorKind::LoneSurrogate(LoneSurrogateError { surrogate: first }),
633 // The error is detected after consuming `\uXXXX` (6 bytes total from '\').
634 offset: 6,
635 }),
636 }
637 }
638
639 /// Parses 4 hex digits, optimized for the success path.
640 #[inline(always)]
641 fn parse_hex4(slice: &[u8], base_offset: u8) -> Result<u16, UnescapeError> {
642 // --- HOT PATH ---
643 // This is the path we expect to take most of the time.
644 if let Some(chunk) = slice.get(..4) {
645 // By slicing to 4, we've performed a single bounds check.
646 // The compiler now knows any access from chunk[0] to chunk[3] is safe,
647 // so it will not generate additional bounds checks.
648
649 // We can now safely access the bytes.
650 let b0 = chunk[0];
651 let b1 = chunk[1];
652 let b2 = chunk[2];
653 let b3 = chunk[3];
654
655 // Use the LUT to get the values.
656 if let (Some(v0), Some(v1), Some(v2), Some(v3)) = (
657 HEX[b0 as usize],
658 HEX[b1 as usize],
659 HEX[b2 as usize],
660 HEX[b3 as usize],
661 ) {
662 // All characters are valid hex, combine and return.
663 let result = (v0 as u16) << 12 | (v1 as u16) << 8 | (v2 as u16) << 4 | (v3 as u16);
664 return Ok(result);
665 }
666
667 // If we're here, it means the slice was long enough, but one
668 // of the characters was not a valid hex digit. Fall through to the cold path
669 // to correctly identify which character was invalid.
670 }
671
672 // --- COLD PATH ---
673 // This path handles all errors. It's marked as `#[cold]` to hint to the
674 // compiler that it's less frequently executed.
675 #[cold]
676 fn handle_error(slice: &[u8], base_offset: u8) -> UnescapeError {
677 // Loop through the bytes we *do* have.
678 for (i, &b) in slice.iter().enumerate() {
679 if HEX[b as usize].is_none() {
680 // We found an invalid hex character before running out of bytes.
681 return UnescapeError {
682 kind: UnescapeErrorKind::InvalidHex(InvalidHexError { found: b }),
683 offset: base_offset + i as u8,
684 };
685 }
686 }
687
688 // If the loop completes, all available characters were valid,
689 // but there weren't enough of them.
690 UnescapeError {
691 kind: UnescapeErrorKind::UnexpectedEof,
692 // The error is at the position of the first *missing* character.
693 offset: base_offset + slice.len() as u8,
694 }
695 }
696
697 Err(handle_error(slice, base_offset))
698 }
699}
700
701impl fmt::Debug for Unescape<'_> {
702 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
703 f.debug_struct("Unescape").finish_non_exhaustive()
704 }
705}
706
707impl<B: AsRef<[u8]> + ?Sized> PartialEq<B> for Unescape<'_> {
708 /// Compares the unescaped output with a byte-slice-like object.
709 ///
710 /// Returns `true` if the iterator successfully unescapes to produce a byte
711 /// sequence identical to `other`. If an error occurs, returns `false`.
712 fn eq(&self, other: &B) -> bool {
713 let mut other = other.as_ref();
714 let mut char_buf = [0u8; 4];
715
716 for result in self.clone() {
717 match result {
718 Ok(chunk) => {
719 // Check literal part
720 if !other.starts_with(chunk.literal) {
721 return false;
722 }
723 other = &other[chunk.literal.len()..];
724
725 // Check unescaped part
726 if let Some(c) = chunk.unescaped {
727 let char_bytes = c.encode_utf8(&mut char_buf);
728 if !other.starts_with(char_bytes.as_bytes()) {
729 return false;
730 }
731 other = &other[char_bytes.len()..];
732 }
733 }
734 Err(_) => return false, // An erroring iterator cannot be equal.
735 }
736 }
737 other.is_empty()
738 }
739}
740
741impl<B: AsRef<[u8]>> PartialEq<Unescape<'_>> for Result<B, UnescapeError> {
742 /// Compares the unescaper's outcome with a `Result`.
743 ///
744 /// This allows for precise testing of `Unescape` against either a
745 /// successful outcome (`Ok(bytes)`) or a specific failure (`Err(error)`).
746 fn eq(&self, unescape: &Unescape<'_>) -> bool {
747 match self {
748 Ok(expected_bytes) => unescape == expected_bytes,
749 Err(expected_error) => {
750 for result in unescape.clone() {
751 if let Err(actual_error) = result {
752 // The iterator's first error is its final outcome.
753 return actual_error == *expected_error;
754 }
755 }
756 // `unescape` completed successfully, but an error was expected.
757 false
758 }
759 }
760 }
761}
762
763impl<'a, 'b> PartialEq<Unescape<'a>> for Unescape<'b> {
764 /// Compares two `Unescape` iterators for equality based on their terminal result.
765 ///
766 /// The equality of two `Unescape` iterators is determined by the final `Result`
767 /// that would be obtained if each iterator were fully consumed (e.g., by using `try_collect()`).
768 ///
769 /// The specific rules are as follows:
770 ///
771 /// 1. **Error vs. Error**: If both iterators terminate with an `Err`, they are
772 /// considered **equal** if and only if their `UnescapeError`s are identical.
773 /// Any bytes successfully unescaped *before* the error are ignored in this case.
774 /// 2. **Success vs. Success**: If both iterators terminate with `Ok`, they are
775 /// considered **equal** if and only if the complete sequence of unescaped bytes
776 /// is identical for both.
777 /// 3. **Success vs. Error**: If one iterator terminates with `Ok` and the other
778 /// with `Err`, they are always **not equal**.
779 ///
780 /// # Example
781 ///
782 /// ```
783 /// use json_escape::explicit::unescape;
784 ///
785 /// // Case 1: Both iterators produce the same error. They are equal,
786 /// // even though their valid prefixes ("a" and "b") are different.
787 /// let failing_a = unescape(r#"a\k"#);
788 /// let failing_b = unescape(r#"b\k"#);
789 /// assert_eq!(failing_a, failing_b);
790 ///
791 /// // Case 2: Both iterators succeed. Equality depends on the byte stream.
792 /// let successful_a = unescape(r#"hello\nworld"#);
793 /// let successful_b = unescape(r#"hello\nworld"#);
794 /// assert_eq!(successful_a, successful_b);
795 ///
796 /// let successful_c = unescape(r#"different"#);
797 /// assert_ne!(successful_a, successful_c);
798 ///
799 /// // Case 3: One succeeds and one fails. They are not equal.
800 /// let succeeding = unescape(r#"stop"#);
801 /// let failing = unescape(r#"stop\k"#);
802 /// assert_ne!(succeeding, failing);
803 ///
804 /// // Case 4: Both iterators fail differently. They are not equal.
805 /// let failing_a = unescape(r#"data:\k"#);
806 /// let failing_b = unescape(r#"data:\"#);
807 /// assert_ne!(failing_a, failing_b);
808 /// ```
809 fn eq(&self, other: &Unescape<'a>) -> bool {
810 // The crate parallel is easier
811 crate::unescape(self.bytes) == crate::unescape(other.bytes)
812 }
813}
814
815#[cfg(feature = "alloc")]
816impl<'a> TryFrom<Unescape<'a>> for Cow<'a, [u8]> {
817 type Error = UnescapeError;
818
819 /// Efficiently collects the unescaped bytes into a `Cow<'a, [u8]>`.
820 ///
821 /// Returns `Cow::Borrowed` if no escape sequences were present, avoiding
822 /// allocation. Otherwise, returns `Cow::Owned`. If an error occurs, it's
823 /// returned immediately.
824 fn try_from(mut value: Unescape<'a>) -> Result<Self, Self::Error> {
825 match value.next() {
826 None => Ok(Cow::Borrowed(b"")),
827 Some(Ok(first)) => {
828 if first.unescaped.is_none() {
829 // The first and only chunk has no unescaped part. No allocation needed.
830 Ok(Cow::Borrowed(first.literal))
831 } else {
832 // An escape was processed. Must allocate and collect the rest.
833 let mut buf = Vec::with_capacity(value.bytes.len() + 16);
834 buf.extend_from_slice(first.literal);
835
836 // Helper to append a char directly to the Vec<u8> buffer.
837 // This should be more efficient than using an intermediate stack buffer.
838 let append_char = |buf: &mut Vec<u8>, c: char| {
839 // Reserve space for the character's bytes and write directly into the buffer.
840 let char_len = c.len_utf8();
841 let old_len = buf.len();
842 buf.resize(old_len + char_len, 0);
843 c.encode_utf8(&mut buf[old_len..]);
844 };
845
846 if let Some(c) = first.unescaped {
847 append_char(&mut buf, c);
848 }
849
850 for item in value {
851 let chunk = item?;
852 buf.extend_from_slice(chunk.literal);
853 if let Some(c) = chunk.unescaped {
854 append_char(&mut buf, c);
855 }
856 }
857 Ok(Cow::Owned(buf))
858 }
859 }
860 Some(Err(e)) => Err(e),
861 }
862 }
863}
864
865/// A wrapper struct for implementing `fmt::Display` on an [`Unescape`] iterator.
866pub struct DisplayUnescape<'a> {
867 inner: Unescape<'a>,
868 lossy: bool,
869}
870
871impl<'a> fmt::Display for DisplayUnescape<'a> {
872 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
873 for chunk_result in self.inner.clone() {
874 match chunk_result {
875 Ok(chunk) => {
876 let display_chunk = DisplayUnescapedChunk {
877 chunk: &chunk,
878 lossy: self.lossy,
879 };
880 write!(f, "{}", display_chunk)?;
881 }
882 Err(_) => return Err(fmt::Error), // Signal error to formatter
883 }
884 }
885 Ok(())
886 }
887}
888
889// Escape table: maps the byte after '\' to its escaped representation.
890const UNESCAPE_TABLE: [Option<char>; 256] = {
891 let mut tbl: [Option<char>; 256] = [None; 256];
892 tbl[b'"' as usize] = Some('\"');
893 tbl[b'\\' as usize] = Some('\\');
894 tbl[b'/' as usize] = Some('/');
895 tbl[b'b' as usize] = Some('\x08');
896 tbl[b'f' as usize] = Some('\x0C');
897 tbl[b'n' as usize] = Some('\n');
898 tbl[b'r' as usize] = Some('\r');
899 tbl[b't' as usize] = Some('\t');
900 tbl
901};
902
903// --- Look-Up Table for Hex Decoding ---
904const HEX: [Option<u8>; 256] = {
905 let mut table = [None; 256];
906 let mut i = 0;
907 while i < 256 {
908 table[i] = match i as u8 {
909 b'0'..=b'9' => Some(i as u8 - b'0'),
910 b'a'..=b'f' => Some(i as u8 - b'a' + 10),
911 b'A'..=b'F' => Some(i as u8 - b'A' + 10),
912 _ => None,
913 };
914 i += 1;
915 }
916 table
917};
918
919//==============================================================================
920// Iterator Trait Implementations
921//==============================================================================
922
923#[cfg(feature = "alloc")]
924mod iter_traits {
925 use super::{EscapedChunk, UnescapedChunk};
926 use alloc::string::String;
927 use alloc::vec::Vec;
928
929 /// Collects an iterator of escaped chunks into a single `String`.
930 impl<'a> FromIterator<EscapedChunk<'a>> for String {
931 #[inline]
932 fn from_iter<I: IntoIterator<Item = EscapedChunk<'a>>>(iter: I) -> String {
933 let mut s = String::new();
934 s.extend(iter);
935 s
936 }
937 }
938
939 /// Extends a `String` with an iterator of escaped chunks.
940 impl<'a> Extend<EscapedChunk<'a>> for String {
941 #[inline]
942 fn extend<I: IntoIterator<Item = EscapedChunk<'a>>>(&mut self, iter: I) {
943 iter.into_iter().for_each(move |chunk| {
944 self.push_str(chunk.literal);
945 if let Some(escaped_str) = chunk.escaped {
946 self.push_str(escaped_str);
947 }
948 });
949 }
950 }
951
952 /// Collects an iterator of unescaped chunks into a byte vector.
953 impl<'a> FromIterator<UnescapedChunk<'a>> for Vec<u8> {
954 #[inline]
955 fn from_iter<I: IntoIterator<Item = UnescapedChunk<'a>>>(iter: I) -> Vec<u8> {
956 let mut buf = Vec::new();
957 buf.extend(iter);
958 buf
959 }
960 }
961
962 /// Extends a byte vector with an iterator of unescaped chunks.
963 impl<'a> Extend<UnescapedChunk<'a>> for Vec<u8> {
964 #[inline]
965 fn extend<I: IntoIterator<Item = UnescapedChunk<'a>>>(&mut self, iter: I) {
966 iter.into_iter().for_each(move |chunk| {
967 self.extend_from_slice(chunk.literal);
968 if let Some(c) = chunk.unescaped {
969 let char_len = c.len_utf8();
970 let old_len = self.len();
971 self.resize(old_len + char_len, 0);
972 c.encode_utf8(&mut self[old_len..]);
973 }
974 })
975 }
976 }
977}
978
979#[cfg(test)]
980mod tests {
981 use super::*;
982
983 impl<'a> EscapedChunk<'a> {
984 /// Creates a new `EscapedChunk`.
985 const fn new(literal: &'a str, escaped: Option<&'static str>) -> Self {
986 Self { literal, escaped }
987 }
988 }
989
990 impl<'a> UnescapedChunk<'a> {
991 /// Creates a new `UnescapedChunk`.
992 const fn new(literal: &'a [u8], unescaped: Option<char>) -> Self {
993 Self { literal, unescaped }
994 }
995 }
996
997 #[test]
998 fn escape_chunks() {
999 let mut it = escape_str("a\nb\"c");
1000 assert_eq!(
1001 it.next(),
1002 Some(EscapedChunk::new("a", Some(r#"\n"#))),
1003 "Chunk 1"
1004 );
1005 assert_eq!(
1006 it.next(),
1007 Some(EscapedChunk::new("b", Some(r#"\""#))),
1008 "Chunk 2"
1009 );
1010 assert_eq!(it.next(), Some(EscapedChunk::new("c", None)), "Chunk 3");
1011 assert_eq!(it.next(), None, "End of iterator");
1012 }
1013
1014 #[test]
1015 fn unescape_chunks() {
1016 let mut it = unescape(br"xy\t\u0020z");
1017 assert_eq!(
1018 it.next().unwrap().unwrap(),
1019 UnescapedChunk::new(b"xy", Some('\t')),
1020 "Chunk 1"
1021 );
1022 assert_eq!(
1023 it.next().unwrap().unwrap(),
1024 UnescapedChunk::new(b"", Some(' ')),
1025 "Chunk 2"
1026 );
1027 assert_eq!(
1028 it.next().unwrap().unwrap(),
1029 UnescapedChunk::new(b"z", None),
1030 "Chunk 3"
1031 );
1032 assert_eq!(it.next(), None, "End of iterator");
1033 }
1034
1035 #[test]
1036 fn test_escape_against_collected_string() {
1037 assert_eq!(
1038 escape_str("Hello, world!").collect::<String>(),
1039 "Hello, world!"
1040 );
1041 assert_eq!(escape_str("a\"b").collect::<String>(), r#"a\"b"#);
1042 assert_eq!(escape_str("\0").collect::<String>(), r#"\u0000"#);
1043 assert_eq!(
1044 escape_str("path/to/file").collect::<String>(),
1045 r#"path/to/file"#
1046 );
1047
1048 escape_str(r#"Unicode test: éà çüâ. Emoji: π. More symbols: β€οΈβ
."#).for_each(|_| {});
1049 }
1050
1051 #[test]
1052 fn test_unescape_against_collected_string() {
1053 assert_eq!(
1054 unescape(br"Hello, world!").decode_utf8().unwrap(),
1055 "Hello, world!"
1056 );
1057 assert_eq!(unescape(br"a\nb").decode_utf8().unwrap(), "a\nb");
1058 assert_eq!(unescape(br"\uD83D\uDE00").decode_utf8().unwrap(), "π");
1059 }
1060
1061 #[test]
1062 fn unescape_error_propagation() {
1063 let mut it = unescape(br"valid\k");
1064
1065 // A better design: the error is the *only* thing that comes out for that step.
1066 // The current implementation bundles the literal with the result of the escape.
1067 // Let's stick with that.
1068 let first_chunk = it.next().unwrap();
1069 assert!(matches!(first_chunk, Err(UnescapeError { .. })));
1070 }
1071
1072 // Inspired by and copied from memchr
1073 #[test]
1074 fn sync_regression() {
1075 use core::panic::{RefUnwindSafe, UnwindSafe};
1076
1077 fn assert_send_sync<T: Send + Sync + UnwindSafe + RefUnwindSafe>() {}
1078 assert_send_sync::<Unescape<'_>>();
1079 assert_send_sync::<Escape<'_>>();
1080
1081 assert_send_sync::<UnescapedChunk<'_>>();
1082 assert_send_sync::<EscapedChunk<'_>>();
1083 }
1084}