json_escape/explicit.rs
1//! More explicit and fine-grained iterators for JSON escaping and unescaping.
2//!
3//! This module provides an alternative API to the one in the crate root. While the
4//! root API yields slices (`&str` or `&[u8]`) that represent the final output,
5//! this module's iterators yield "chunk" structs. These structs distinguish between
6//! parts of the input that were processed literally and the specific characters
7//! that were escaped or unescaped.
8//!
9//! This approach offers several advantages:
10//! - **Greater Control**: You can inspect each component of the transformation,
11//! which can be useful for debugging, logging, or more complex data processing.
12//! - **Potential Performance**: By avoiding the need to look up single-byte escape
13//! sequences in a table on every iteration, some workflows may see a minor
14//! performance improvement.
15//! - **Clarity**: The structure of the output more closely reflects the transformation
16//! process, which can make the logic easier to follow.
17//!
18//! # Example: Escaping
19//!
20//! ```
21//! use json_escape::explicit::escape_str;
22//!
23//! let mut escaper = escape_str("a\nb");
24//!
25//! // The first chunk contains the literal "a" and the escaped newline.
26//! let chunk1 = escaper.next().unwrap();
27//! assert_eq!("a", chunk1.literal());
28//! assert_eq!(Some(r#"\n"#), chunk1.escaped());
29//!
30//! // The second chunk contains the literal "b" and no escaped sequence.
31//! let chunk2 = escaper.next().unwrap();
32//! assert_eq!("b", chunk2.literal());
33//! assert_eq!(None, chunk2.escaped());
34//!
35//! // The iterator is now exhausted.
36//! assert!(escaper.next().is_none());
37//! ```
38//!
39//! # Example: Unescaping
40//!
41//! ```
42//! use json_escape::explicit::unescape;
43//!
44//! let mut unescaper = unescape(br"hello\tworld");
45//!
46//! // The first chunk contains the literal "hello" and the unescaped tab.
47//! let chunk1 = unescaper.next().unwrap().unwrap();
48//! assert_eq!(b"hello", chunk1.literal());
49//! assert_eq!(Some('\t'), chunk1.unescaped());
50//!
51//! // The second chunk contains the literal "world" and no unescaped character.
52//! let chunk2 = unescaper.next().unwrap().unwrap();
53//! assert_eq!(b"world", chunk2.literal());
54//! assert_eq!(None, chunk2.unescaped());
55//!
56//! // The iterator is now exhausted.
57//! assert!(unescaper.next().is_none());
58//! ```
59//!
60//! Both `Escape` and `Unescape` iterators provide `display` helpers for easy integration
61//! with Rust's formatting system, preserving the zero-allocation benefits of the main API.
62
63#[cfg(feature = "alloc")]
64use crate::DecodeUtf8Error;
65use crate::token::{EscapeTokens, UnescapeTokens};
66use crate::{UnescapeError, display_bytes_utf8};
67use core::fmt;
68use core::iter::FusedIterator;
69use core::str;
70
71#[cfg(feature = "alloc")]
72use alloc::{borrow::Cow, string::String, vec::Vec};
73
74//==============================================================================
75// Escaping
76//==============================================================================
77
78/// Creates an iterator that yields chunks of an escaped JSON string.
79///
80/// See the [module-level documentation](self) for more details.
81#[inline]
82pub fn escape_str(s: &str) -> Escape<'_> {
83 Escape {
84 bytes: s.as_bytes(),
85 }
86}
87
88/// A chunk of a JSON-escaped string, separating the literal part from the escaped sequence.
89///
90/// This struct is yielded by the [`Escape`] iterator.
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
92pub struct EscapedChunk<'a> {
93 /// A slice of the original input that did not require escaping.
94 literal: &'a str,
95 /// The escaped sequence (e.g., `r#"\n"#`, `r#"\""#`) that immediately follows the literal part.
96 /// Is `None` if this is the last chunk and it has no trailing escape.
97 escaped: Option<&'static str>,
98}
99
100impl<'a> EscapedChunk<'a> {
101 /// Returns the literal part of the chunk, which is a slice of the original string.
102 #[inline]
103 pub const fn literal(&self) -> &'a str {
104 self.literal
105 }
106
107 /// Returns the escaped part of the chunk, if any.
108 #[inline]
109 pub const fn escaped(&self) -> Option<&'static str> {
110 self.escaped
111 }
112}
113
114impl<'a> fmt::Display for EscapedChunk<'a> {
115 #[inline]
116 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
117 f.write_str(self.literal)?;
118 if let Some(s) = self.escaped {
119 f.write_str(s)?;
120 }
121 Ok(())
122 }
123}
124
125/// An iterator over a string that yields [`EscapedChunk`]s.
126///
127/// Created by the [`escape_str`] function.
128#[derive(Clone)]
129#[must_use = "iterators are lazy and do nothing unless consumed"]
130pub struct Escape<'a> {
131 pub(crate) bytes: &'a [u8],
132}
133
134impl<'a> Iterator for Escape<'a> {
135 type Item = EscapedChunk<'a>;
136
137 #[inline]
138 fn next(&mut self) -> Option<Self::Item> {
139 if self.bytes.is_empty() {
140 return None;
141 }
142
143 // SAFETY: Input is string
144 let (literal, rest) = unsafe { EscapeTokens::split_at_escape(self.bytes) };
145
146 Some(EscapedChunk {
147 literal,
148 escaped: {
149 if rest.is_empty() {
150 self.bytes = rest;
151 None
152 } else {
153 // An escapable byte is at the beginning of the slice.
154 self.bytes = &rest[1..];
155 Some(
156 EscapeTokens::escape(rest[0])
157 .expect("find_escape_char found a byte not in ESCAPE_TABLE"),
158 )
159 }
160 },
161 })
162 }
163
164 // TODO: size_hint
165}
166
167impl<'a> FusedIterator for Escape<'a> {}
168
169impl<'a> fmt::Display for Escape<'a> {
170 /// This allows the escaped output to be written directly to a formatter
171 /// without intermediate allocation.
172 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
173 for chunk in self.clone() {
174 write!(f, "{chunk}")?;
175 }
176 Ok(())
177 }
178}
179
180impl fmt::Debug for Escape<'_> {
181 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
182 f.debug_struct("Escape").finish_non_exhaustive()
183 }
184}
185
186impl<B: AsRef<[u8]> + ?Sized> PartialEq<B> for Escape<'_> {
187 /// Compares the escaped output with any byte-slice-like object.
188 ///
189 /// This is a convenience for testing, allowing you to check the fully
190 /// concatenated result of an `Escape` iterator against a known `&str` or `&[u8]`.
191 fn eq(&self, other: &B) -> bool {
192 let mut other = other.as_ref();
193 for chunk in self.clone() {
194 // Check literal part
195 if !other.starts_with(chunk.literal.as_bytes()) {
196 return false;
197 }
198 other = &other[chunk.literal.len()..];
199
200 // Check escaped part
201 if let Some(escaped_str) = chunk.escaped {
202 if !other.starts_with(escaped_str.as_bytes()) {
203 return false;
204 }
205 other = &other[escaped_str.len()..];
206 }
207 }
208 other.is_empty()
209 }
210}
211
212impl<'a, 'b> PartialEq<Escape<'a>> for Escape<'b> {
213 /// Compares two `Escape` iterators for equality.
214 ///
215 /// Two `Escape` iterators are considered equal if they'll produce the same **output**.
216 /// It first performs a fast check on the underlying byte slices.
217 fn eq(&self, other: &Escape<'a>) -> bool {
218 // The crate parallel is easier
219 crate::Escape {
220 inner: EscapeTokens { bytes: self.bytes },
221 } == crate::Escape {
222 inner: EscapeTokens { bytes: other.bytes },
223 }
224 }
225}
226
227#[cfg(feature = "alloc")]
228impl<'a> From<Escape<'a>> for Cow<'a, str> {
229 /// Efficiently collects the escaped parts into a `Cow<'a, str>`.
230 ///
231 /// This implementation is optimized to avoid allocation if possible:
232 /// - If the input string requires **no escaping**, it returns `Cow::Borrowed`
233 /// with a slice of the original string.
234 /// - If escaping is needed, it allocates a `String` and returns `Cow::Owned`.
235 fn from(mut iter: Escape<'a>) -> Self {
236 match iter.next() {
237 None => Cow::Borrowed(""),
238 Some(first) => {
239 if first.escaped.is_none() {
240 // No escape in the first (and only) chunk, so no escaping was needed.
241 Cow::Borrowed(first.literal)
242 } else {
243 // Escaping occurred. We must allocate.
244 let mut s = String::with_capacity(
245 first.literal.len() + first.escaped.unwrap().len() + iter.bytes.len(),
246 );
247 s.push_str(first.literal);
248 s.push_str(first.escaped.unwrap());
249 s.extend(iter);
250 Cow::Owned(s)
251 }
252 }
253 }
254 }
255}
256
257//==============================================================================
258// Unescaping
259//==============================================================================
260
261/// Creates an iterator that yields chunks of an unescaped JSON string.
262///
263/// See the [module-level documentation](self) for more details.
264#[inline]
265pub fn unescape<I: AsRef<[u8]> + ?Sized>(input: &I) -> Unescape<'_> {
266 Unescape {
267 bytes: input.as_ref(),
268 }
269}
270
271/// Creates a streaming JSON string unescaper that handles enclosing quotes.
272///
273/// This function is a convenience wrapper around [`unescape`]. If the input byte
274/// slice starts and ends with a double-quote (`"`), the quotes are trimmed
275/// before the content is unescaped.
276///
277/// If the input is not enclosed in quotes, this function behaves identically to
278/// [`unescape`].
279///
280/// # Examples
281///
282/// ```
283/// use json_escape::explicit::unescape_quoted;
284///
285/// // An input string with quotes and an escaped tab.
286/// let bytes = br#""\tline""#;
287/// let mut unescaper = unescape_quoted(bytes);
288///
289/// // The first chunk is the unescaped tab character.
290/// let chunk1 = unescaper.next().unwrap().unwrap();
291/// assert_eq!(b"", chunk1.literal());
292/// assert_eq!(Some('\t'), chunk1.unescaped());
293///
294/// // The second chunk is the literal "line".
295/// let chunk2 = unescaper.next().unwrap().unwrap();
296/// assert_eq!(b"line", chunk2.literal());
297/// assert_eq!(None, chunk2.unescaped());
298///
299/// // The iterator is now exhausted.
300/// assert!(unescaper.next().is_none());
301/// ```
302#[inline]
303pub fn unescape_quoted(bytes: &[u8]) -> Unescape<'_> {
304 let inner = if bytes.len() >= 2 && bytes.first() == Some(&b'"') && bytes.last() == Some(&b'"') {
305 &bytes[1..bytes.len() - 1]
306 } else {
307 bytes
308 };
309 unescape(inner)
310}
311
312/// A chunk of a JSON-unescaped byte slice, separating the literal part from the unescaped character.
313///
314/// This struct is yielded by the [`Unescape`] iterator.
315#[derive(Debug, Clone, Copy, PartialEq, Eq)]
316pub struct UnescapedChunk<'a> {
317 /// A slice of the original input that did not require unescaping.
318 pub(crate) literal: &'a [u8],
319 /// The single character that was unescaped.
320 /// Is `None` if this is the last chunk and it has no trailing unescaped character.
321 pub(crate) unescaped: Option<char>,
322}
323
324impl<'a> UnescapedChunk<'a> {
325 /// Returns the literal part of the chunk, which is a slice of the original bytes.
326 #[inline]
327 pub const fn literal(&self) -> &'a [u8] {
328 self.literal
329 }
330
331 /// Returns the unescaped character, if any.
332 #[inline]
333 pub const fn unescaped(&self) -> Option<char> {
334 self.unescaped
335 }
336
337 /// Returns a displayable wrapper that will format the chunk as a UTF-8 string.
338 ///
339 /// If the literal part of the chunk contains invalid UTF-8 sequences, this
340 /// will result in a `fmt::Error`.
341 pub fn display_utf8(&self) -> DisplayUnescapedChunk<'_> {
342 DisplayUnescapedChunk {
343 chunk: self,
344 lossy: false,
345 }
346 }
347
348 /// Returns a displayable wrapper that will format the chunk as a lossy UTF-8 string.
349 ///
350 /// Any invalid UTF-8 sequences in the literal part of the chunk will be
351 /// replaced with the U+FFFD replacement character.
352 pub fn display_utf8_lossy(&self) -> DisplayUnescapedChunk<'_> {
353 DisplayUnescapedChunk {
354 chunk: self,
355 lossy: true,
356 }
357 }
358}
359
360/// Helper struct for safely displaying an [`UnescapedChunk`].
361pub struct DisplayUnescapedChunk<'a> {
362 chunk: &'a UnescapedChunk<'a>,
363 lossy: bool,
364}
365
366impl<'a> fmt::Display for DisplayUnescapedChunk<'a> {
367 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
368 display_bytes_utf8(self.chunk.literal, f, self.lossy)?;
369 if let Some(c) = self.chunk.unescaped {
370 use fmt::Write as _;
371
372 f.write_char(c)?;
373 }
374 Ok(())
375 }
376}
377
378/// An iterator over a byte slice that yields [`UnescapedChunk`]s.
379///
380/// Created by the [`unescape`] function.
381#[derive(Clone)]
382#[must_use = "iterators are lazy and do nothing unless consumed"]
383pub struct Unescape<'a> {
384 pub(crate) bytes: &'a [u8],
385}
386
387impl<'a> Unescape<'a> {
388 /// Decodes the unescaped byte stream into a UTF-8 string.
389 ///
390 /// This method consumes the iterator and collects all resulting byte chunks
391 /// into a `Cow<[u8]>`, which is then validated as UTF-8. If an unescaping
392 /// error occurs, it's returned immediately. If the final sequence of bytes
393 /// is not valid UTF-8, a UTF-8 error is returned.
394 ///
395 /// This is optimized to return a `Cow::Borrowed` if no escapes were present
396 /// in the input, avoiding allocation.
397 ///
398 /// **Requires the `alloc` feature.**
399 ///
400 /// # Example
401 ///
402 /// ```
403 /// # #[cfg(feature = "alloc")] {
404 /// use json_escape::explicit::unescape;
405 ///
406 /// let input = r#"Emoji: \uD83D\uDE00"#;
407 /// let cow = unescape(input).decode_utf8().unwrap();
408 ///
409 /// assert_eq!(cow, "Emoji: 😀");
410 /// # }
411 /// ```
412 #[cfg(feature = "alloc")]
413 pub fn decode_utf8(self) -> Result<Cow<'a, str>, DecodeUtf8Error> {
414 match self.try_into().map_err(DecodeUtf8Error::Unescape)? {
415 Cow::Borrowed(bytes) => str::from_utf8(bytes)
416 .map(Cow::Borrowed)
417 .map_err(DecodeUtf8Error::Utf8),
418 Cow::Owned(bytes) => String::from_utf8(bytes)
419 .map(Cow::Owned)
420 .map_err(|e| DecodeUtf8Error::Utf8(e.utf8_error())),
421 }
422 }
423
424 /// Decodes the unescaped byte stream lossily into a UTF-8 string.
425 ///
426 /// This is similar to [`Unescape::decode_utf8`] but replaces any invalid UTF-8 sequences
427 /// with the replacement character (`U+FFFD`) instead of returning an error.
428 ///
429 /// An `UnescapeError` can still be returned if the JSON escaping itself is invalid.
430 ///
431 /// **Requires the `alloc` feature.**
432 #[cfg(feature = "alloc")]
433 pub fn decode_utf8_lossy(self) -> Result<Cow<'a, str>, UnescapeError> {
434 use crate::decode_utf8_lossy;
435
436 Ok(decode_utf8_lossy(self.try_into()?))
437 }
438
439 /// Returns a wrapper that implements [`fmt::Display`].
440 ///
441 /// If an unescaping error or invalid UTF-8 sequence is encountered,
442 /// a `fmt::Error` is returned, which will cause `format!` and friends to panic.
443 pub fn display_utf8(self) -> DisplayUnescape<'a> {
444 DisplayUnescape {
445 inner: self,
446 lossy: false,
447 }
448 }
449
450 /// Returns a wrapper that implements [`fmt::Display` for lossy UTF-8 decoding.
451 ///
452 /// Invalid UTF-8 sequences will be replaced with the replacement character.
453 /// An unescaping error will still result in a `fmt::Error`.
454 pub fn display_utf8_lossy(self) -> DisplayUnescape<'a> {
455 DisplayUnescape {
456 inner: self,
457 lossy: true,
458 }
459 }
460}
461
462impl<'a> Iterator for Unescape<'a> {
463 type Item = Result<UnescapedChunk<'a>, UnescapeError>;
464
465 #[inline]
466 fn next(&mut self) -> Option<Self::Item> {
467 if self.bytes.is_empty() {
468 return None;
469 }
470
471 let (literal, rest) = UnescapeTokens::split_at_escape(self.bytes);
472
473 Some(Ok(UnescapedChunk {
474 literal,
475 unescaped: {
476 if rest.is_empty() {
477 // there's no remainder, we just have a literal.
478 self.bytes = rest;
479 None
480 } else {
481 // rest starts with '\\'
482 let mut remainder = &rest[1..];
483 match UnescapeTokens::handle_escape(&mut remainder) {
484 Ok(unescaped_char) => {
485 self.bytes = remainder;
486 Some(unescaped_char)
487 }
488 Err(err) => return Some(Err(err)),
489 }
490 }
491 },
492 }))
493 }
494
495 // TODO: sizehint
496}
497
498impl<'a> FusedIterator for Unescape<'a> {}
499
500impl fmt::Debug for Unescape<'_> {
501 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
502 f.debug_struct("Unescape").finish_non_exhaustive()
503 }
504}
505
506impl<B: AsRef<[u8]> + ?Sized> PartialEq<B> for Unescape<'_> {
507 /// Compares the unescaped output with a byte-slice-like object.
508 ///
509 /// Returns `true` if the iterator successfully unescapes to produce a byte
510 /// sequence identical to `other`. If an error occurs, returns `false`.
511 fn eq(&self, other: &B) -> bool {
512 let mut other = other.as_ref();
513 let mut char_buf = [0u8; 4];
514
515 for result in self.clone() {
516 match result {
517 Ok(chunk) => {
518 // Check literal part
519 if !other.starts_with(chunk.literal) {
520 return false;
521 }
522 other = &other[chunk.literal.len()..];
523
524 // Check unescaped part
525 if let Some(c) = chunk.unescaped {
526 let char_bytes = c.encode_utf8(&mut char_buf);
527 if !other.starts_with(char_bytes.as_bytes()) {
528 return false;
529 }
530 other = &other[char_bytes.len()..];
531 }
532 }
533 Err(_) => return false, // An erroring iterator cannot be equal.
534 }
535 }
536 other.is_empty()
537 }
538}
539
540impl<B: AsRef<[u8]>> PartialEq<Unescape<'_>> for Result<B, UnescapeError> {
541 /// Compares the unescaper's outcome with a `Result`.
542 ///
543 /// This allows for precise testing of `Unescape` against either a
544 /// successful outcome (`Ok(bytes)`) or a specific failure (`Err(error)`).
545 fn eq(&self, unescape: &Unescape<'_>) -> bool {
546 match self {
547 Ok(expected_bytes) => unescape == expected_bytes,
548 Err(expected_error) => {
549 for result in unescape.clone() {
550 if let Err(actual_error) = result {
551 // The iterator's first error is its final outcome.
552 return actual_error == *expected_error;
553 }
554 }
555 // `unescape` completed successfully, but an error was expected.
556 false
557 }
558 }
559 }
560}
561
562impl<'a, 'b> PartialEq<Unescape<'a>> for Unescape<'b> {
563 /// Compares two `Unescape` iterators for equality based on their terminal result.
564 ///
565 /// The equality of two `Unescape` iterators is determined by the final `Result`
566 /// that would be obtained if each iterator were fully consumed (e.g., by using `try_collect()`).
567 ///
568 /// The specific rules are as follows:
569 ///
570 /// 1. **Error vs. Error**: If both iterators terminate with an `Err`, they are
571 /// considered **equal** if and only if their `UnescapeError`s are identical.
572 /// Any bytes successfully unescaped *before* the error are ignored in this case.
573 /// 2. **Success vs. Success**: If both iterators terminate with `Ok`, they are
574 /// considered **equal** if and only if the complete sequence of unescaped bytes
575 /// is identical for both.
576 /// 3. **Success vs. Error**: If one iterator terminates with `Ok` and the other
577 /// with `Err`, they are always **not equal**.
578 ///
579 /// # Example
580 ///
581 /// ```
582 /// use json_escape::explicit::unescape;
583 ///
584 /// // Case 1: Both iterators produce the same error. They are equal,
585 /// // even though their valid prefixes ("a" and "b") are different.
586 /// let failing_a = unescape(r#"a\k"#);
587 /// let failing_b = unescape(r#"b\k"#);
588 /// assert_eq!(failing_a, failing_b);
589 ///
590 /// // Case 2: Both iterators succeed. Equality depends on the byte stream.
591 /// let successful_a = unescape(r#"hello\nworld"#);
592 /// let successful_b = unescape(r#"hello\nworld"#);
593 /// assert_eq!(successful_a, successful_b);
594 ///
595 /// let successful_c = unescape(r#"different"#);
596 /// assert_ne!(successful_a, successful_c);
597 ///
598 /// // Case 3: One succeeds and one fails. They are not equal.
599 /// let succeeding = unescape(r#"stop"#);
600 /// let failing = unescape(r#"stop\k"#);
601 /// assert_ne!(succeeding, failing);
602 ///
603 /// // Case 4: Both iterators fail differently. They are not equal.
604 /// let failing_a = unescape(r#"data:\k"#);
605 /// let failing_b = unescape(r#"data:\"#);
606 /// assert_ne!(failing_a, failing_b);
607 /// ```
608 fn eq(&self, other: &Unescape<'a>) -> bool {
609 // The crate parallel is easier
610 crate::unescape(self.bytes) == crate::unescape(other.bytes)
611 }
612}
613
614#[cfg(feature = "alloc")]
615impl<'a> TryFrom<Unescape<'a>> for Cow<'a, [u8]> {
616 type Error = UnescapeError;
617
618 /// Efficiently collects the unescaped bytes into a `Cow<'a, [u8]>`.
619 ///
620 /// Returns `Cow::Borrowed` if no escape sequences were present, avoiding
621 /// allocation. Otherwise, returns `Cow::Owned`. If an error occurs, it's
622 /// returned immediately.
623 fn try_from(mut value: Unescape<'a>) -> Result<Self, Self::Error> {
624 use crate::token::append_char;
625
626 match value.next() {
627 None => Ok(Cow::Borrowed(b"")),
628 Some(Ok(first)) => {
629 if first.unescaped.is_none() {
630 // The first and only chunk has no unescaped part. No allocation needed.
631 Ok(Cow::Borrowed(first.literal))
632 } else {
633 // An escape was processed. Must allocate and collect the rest.
634 let mut buf = Vec::with_capacity(value.bytes.len() + 16);
635 buf.extend_from_slice(first.literal);
636
637 if let Some(c) = first.unescaped {
638 append_char(&mut buf, c);
639 }
640
641 for item in value {
642 let chunk = item?;
643 buf.extend_from_slice(chunk.literal);
644 if let Some(c) = chunk.unescaped {
645 append_char(&mut buf, c);
646 }
647 }
648 Ok(Cow::Owned(buf))
649 }
650 }
651 Some(Err(e)) => Err(e),
652 }
653 }
654}
655
656/// A wrapper struct for implementing `fmt::Display` on an [`Unescape`] iterator.
657pub struct DisplayUnescape<'a> {
658 inner: Unescape<'a>,
659 lossy: bool,
660}
661
662impl<'a> fmt::Display for DisplayUnescape<'a> {
663 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
664 for chunk_result in self.inner.clone() {
665 match chunk_result {
666 Ok(chunk) => {
667 let display_chunk = DisplayUnescapedChunk {
668 chunk: &chunk,
669 lossy: self.lossy,
670 };
671 write!(f, "{}", display_chunk)?;
672 }
673 Err(_) => return Err(fmt::Error), // Signal error to formatter
674 }
675 }
676 Ok(())
677 }
678}
679
680//==============================================================================
681// Iterator Trait Implementations
682//==============================================================================
683
684#[cfg(feature = "alloc")]
685mod iter_traits {
686 use crate::token::append_char;
687
688 use super::{EscapedChunk, UnescapedChunk};
689 use alloc::string::String;
690 use alloc::vec::Vec;
691
692 /// Collects an iterator of escaped chunks into a single `String`.
693 impl<'a> FromIterator<EscapedChunk<'a>> for String {
694 #[inline]
695 fn from_iter<I: IntoIterator<Item = EscapedChunk<'a>>>(iter: I) -> String {
696 let mut s = String::new();
697 s.extend(iter);
698 s
699 }
700 }
701
702 /// Extends a `String` with an iterator of escaped chunks.
703 impl<'a> Extend<EscapedChunk<'a>> for String {
704 #[inline]
705 fn extend<I: IntoIterator<Item = EscapedChunk<'a>>>(&mut self, iter: I) {
706 iter.into_iter().for_each(move |chunk| {
707 self.push_str(chunk.literal);
708 if let Some(escaped_str) = chunk.escaped {
709 self.push_str(escaped_str);
710 }
711 });
712 }
713 }
714
715 /// Collects an iterator of unescaped chunks into a byte vector.
716 impl<'a> FromIterator<UnescapedChunk<'a>> for Vec<u8> {
717 #[inline]
718 fn from_iter<I: IntoIterator<Item = UnescapedChunk<'a>>>(iter: I) -> Vec<u8> {
719 let mut buf = Vec::new();
720 buf.extend(iter);
721 buf
722 }
723 }
724
725 /// Extends a byte vector with an iterator of unescaped chunks.
726 impl<'a> Extend<UnescapedChunk<'a>> for Vec<u8> {
727 #[inline]
728 fn extend<I: IntoIterator<Item = UnescapedChunk<'a>>>(&mut self, iter: I) {
729 iter.into_iter().for_each(move |chunk| {
730 self.extend_from_slice(chunk.literal);
731 if let Some(c) = chunk.unescaped {
732 append_char(self, c)
733 }
734 })
735 }
736 }
737}
738
739#[cfg(test)]
740mod tests {
741 use super::*;
742
743 impl<'a> EscapedChunk<'a> {
744 /// Creates a new `EscapedChunk`.
745 const fn new(literal: &'a str, escaped: Option<&'static str>) -> Self {
746 Self { literal, escaped }
747 }
748 }
749
750 impl<'a> UnescapedChunk<'a> {
751 /// Creates a new `UnescapedChunk`.
752 const fn new(literal: &'a [u8], unescaped: Option<char>) -> Self {
753 Self { literal, unescaped }
754 }
755 }
756
757 #[test]
758 fn escape_chunks() {
759 let mut it = escape_str("a\nb\"c");
760 assert_eq!(
761 it.next(),
762 Some(EscapedChunk::new("a", Some(r#"\n"#))),
763 "Chunk 1"
764 );
765 assert_eq!(
766 it.next(),
767 Some(EscapedChunk::new("b", Some(r#"\""#))),
768 "Chunk 2"
769 );
770 assert_eq!(it.next(), Some(EscapedChunk::new("c", None)), "Chunk 3");
771 assert_eq!(it.next(), None, "End of iterator");
772 }
773
774 #[test]
775 fn unescape_chunks() {
776 let mut it = unescape(br"xy\t\u0020z");
777 assert_eq!(
778 it.next().unwrap().unwrap(),
779 UnescapedChunk::new(b"xy", Some('\t')),
780 "Chunk 1"
781 );
782 assert_eq!(
783 it.next().unwrap().unwrap(),
784 UnescapedChunk::new(b"", Some(' ')),
785 "Chunk 2"
786 );
787 assert_eq!(
788 it.next().unwrap().unwrap(),
789 UnescapedChunk::new(b"z", None),
790 "Chunk 3"
791 );
792 assert_eq!(it.next(), None, "End of iterator");
793 }
794
795 #[test]
796 fn test_escape_against_collected_string() {
797 assert_eq!(
798 escape_str("Hello, world!").collect::<String>(),
799 "Hello, world!"
800 );
801 assert_eq!(escape_str("a\"b").collect::<String>(), r#"a\"b"#);
802 assert_eq!(escape_str("\0").collect::<String>(), r#"\u0000"#);
803 assert_eq!(
804 escape_str("path/to/file").collect::<String>(),
805 r#"path/to/file"#
806 );
807
808 escape_str(r#"Unicode test: éàçüö. Emoji: 😀. More symbols: ❤️✅."#).for_each(|_| {});
809 }
810
811 #[test]
812 fn test_unescape_against_collected_string() {
813 assert_eq!(
814 unescape(br"Hello, world!").decode_utf8().unwrap(),
815 "Hello, world!"
816 );
817 assert_eq!(unescape(br"a\nb").decode_utf8().unwrap(), "a\nb");
818 assert_eq!(unescape(br"\uD83D\uDE00").decode_utf8().unwrap(), "😀");
819 }
820
821 #[test]
822 fn unescape_error_propagation() {
823 let mut it = unescape(br"valid\k");
824
825 // A better design: the error is the *only* thing that comes out for that step.
826 // The current implementation bundles the literal with the result of the escape.
827 // Let's stick with that.
828 let first_chunk = it.next().unwrap();
829 assert!(matches!(first_chunk, Err(UnescapeError { .. })));
830 }
831
832 // Inspired by and copied from memchr
833 #[test]
834 fn sync_regression() {
835 use core::panic::{RefUnwindSafe, UnwindSafe};
836
837 fn assert_send_sync<T: Send + Sync + UnwindSafe + RefUnwindSafe>() {}
838 assert_send_sync::<Unescape<'_>>();
839 assert_send_sync::<Escape<'_>>();
840
841 assert_send_sync::<UnescapedChunk<'_>>();
842 assert_send_sync::<EscapedChunk<'_>>();
843 }
844}