1#[cfg(feature = "alloc")]
24use crate::DecodeUtf8Error;
25use crate::{
26 InvalidEscapeError, InvalidHexError, LoneSurrogateError, UnescapeError, UnescapeErrorKind,
27};
28use core::{
29 fmt::{self, Write as _},
30 iter::FusedIterator,
31};
32use memchr::memchr;
33
34#[cfg(feature = "alloc")]
35use alloc::{borrow::Cow, string::String, vec::Vec};
36
37#[inline]
45pub fn escape_str(s: &str) -> EscapeTokens<'_> {
46 EscapeTokens {
47 bytes: s.as_bytes(),
48 }
49}
50
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
55pub enum EscapedToken<'a> {
56 Literal(&'a str),
58 Escaped(&'static str),
60}
61
62impl<'a> EscapedToken<'a> {
63 #[inline(always)]
64 pub(crate) fn as_str(&self) -> &'a str {
65 match self {
66 EscapedToken::Literal(s) => s,
67 EscapedToken::Escaped(s) => s,
68 }
69 }
70}
71
72impl fmt::Display for EscapedToken<'_> {
73 #[inline]
74 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
75 f.write_str(self.as_str())
76 }
77}
78
79#[derive(Clone, Debug)]
85#[must_use = "iterators are lazy and do nothing unless consumed"]
86pub struct EscapeTokens<'a> {
87 pub(crate) bytes: &'a [u8],
88}
89
90impl<'a> EscapeTokens<'a> {
91 #[inline]
93 pub const fn new(s: &'a str) -> Self {
94 Self {
95 bytes: s.as_bytes(),
96 }
97 }
98
99 #[inline(always)]
100 pub(crate) fn escape(byte: u8) -> Option<&'static str> {
101 ESCAPE_TABLE[byte as usize]
102 }
103
104 #[inline(always)]
117 pub(crate) unsafe fn split_at_escape(bytes: &[u8]) -> (&str, &[u8]) {
118 let pos = match Self::find_escape_char(bytes) {
120 Some(p) => p,
122 None => bytes.len(),
124 };
125
126 let (literal_bytes, rest) = bytes.split_at(pos);
127 (
130 unsafe { std::str::from_utf8_unchecked(literal_bytes) },
131 rest,
132 )
133 }
134
135 #[doc(hidden)]
137 #[cfg(all(feature = "simd", nightly))]
139 #[inline]
140 pub fn find_escape_char(bytes: &[u8]) -> Option<usize> {
141 use std::simd::{Simd, prelude::SimdPartialEq, prelude::SimdPartialOrd};
142
143 const LANES: usize = 16; let mut i = 0;
145
146 while i + LANES <= bytes.len() {
148 let chunk = Simd::<u8, LANES>::from_slice(&bytes[i..]);
150
151 let space_v = Simd::splat(b' ' - 1); let quote_v = Simd::splat(b'"');
154 let slash_v = Simd::splat(b'\\');
155
156 let lt_space_mask = chunk.simd_le(space_v);
158 let eq_quote_mask = chunk.simd_eq(quote_v);
159 let eq_slash_mask = chunk.simd_eq(slash_v);
160
161 let combined_mask = lt_space_mask | eq_quote_mask | eq_slash_mask;
163
164 if combined_mask.any() {
166 let first_match_index = combined_mask.to_bitmask().trailing_zeros() as usize;
169 return Some(i + first_match_index);
170 }
171
172 i += LANES;
173 }
174
175 if i < bytes.len() {
177 if let Some(pos) = bytes[i..]
178 .iter()
179 .position(|&b| ESCAPE_DECISION_TABLE[b as usize] != 0)
180 {
181 return Some(i + pos);
182 }
183 }
184
185 None
186 }
187
188 #[doc(hidden)]
190 #[cfg(all(feature = "simd", not(nightly), target_arch = "x86_64"))]
191 #[inline]
192 pub fn find_escape_char(bytes: &[u8]) -> Option<usize> {
193 use std::arch::x86_64::*;
196
197 let mut i = 0;
198 const LANES: usize = 16; #[target_feature(enable = "sse2")]
206 unsafe fn find_in_chunk(bytes: &[u8], i: usize) -> Option<usize> {
207 debug_assert!(
210 i + LANES <= bytes.len(),
211 "find_in_chunk: attempted to load past end of slice"
212 );
213
214 let chunk = unsafe { _mm_loadu_si128(bytes.as_ptr().add(i) as *const _) };
217
218 let quote_v = _mm_set1_epi8(b'"' as i8);
220 let slash_v = _mm_set1_epi8(b'\\' as i8);
221
222 let bias = _mm_set1_epi8(0x80u8 as i8);
225 let space_v = _mm_set1_epi8(b' ' as i8);
227
228 let biased_chunk = _mm_xor_si128(chunk, bias);
230 let biased_space_v = _mm_xor_si128(space_v, bias);
231
232 let lt_space_mask = _mm_cmplt_epi8(biased_chunk, biased_space_v);
235
236 let eq_quote_mask = _mm_cmpeq_epi8(chunk, quote_v);
238 let eq_slash_mask = _mm_cmpeq_epi8(chunk, slash_v);
239
240 let combined_mask =
242 _mm_or_si128(lt_space_mask, _mm_or_si128(eq_quote_mask, eq_slash_mask));
243
244 let mask = _mm_movemask_epi8(combined_mask);
246
247 if mask != 0 {
248 Some(i + mask.trailing_zeros() as usize)
249 } else {
250 None
251 }
252 }
253
254 if cfg!(target_feature = "sse2") {
255 while i + LANES <= bytes.len() {
257 if let Some(result) = unsafe { find_in_chunk(bytes, i) } {
261 return Some(result);
262 }
263 i += LANES;
264 }
265 } else {
266 }
269
270 if i < bytes.len() {
272 if let Some(pos) = bytes[i..]
273 .iter()
274 .position(|&b| ESCAPE_DECISION_TABLE[b as usize] != 0)
275 {
276 return Some(i + pos);
277 }
278 }
279
280 None
281 }
282
283 #[doc(hidden)]
286 #[cfg(not(feature = "simd"))]
287 #[inline]
288 pub fn find_escape_char(bytes: &[u8]) -> Option<usize> {
289 use core::mem::size_of;
290
291 const WORD_SIZE: usize = size_of::<usize>();
292 const THRESH: u8 = 0x20; const fn repeat(b: u8) -> usize {
296 let mut m: usize = 0;
297 let mut i = 0;
298 while i < WORD_SIZE {
299 m = (m << 8) | (b as usize);
300 i += 1;
301 }
302 m
303 }
304
305 const ONE_MASK: usize = repeat(0x01);
307 const MSB_MASK: usize = repeat(0x80);
308 const QUOTE_MASK: usize = repeat(b'"');
309 const SLASH_MASK: usize = repeat(b'\\');
310 const THR_MASK: usize = repeat(THRESH);
311
312 let mut i = 0usize;
313 while i + WORD_SIZE <= bytes.len() {
314 let word = unsafe { (bytes.as_ptr().add(i) as *const usize).read_unaligned() };
316
317 let xq = word ^ QUOTE_MASK;
319 let quote_bits = (xq.wrapping_sub(ONE_MASK) & !xq) & MSB_MASK;
320
321 let xs = word ^ SLASH_MASK;
322 let slash_bits = (xs.wrapping_sub(ONE_MASK) & !xs) & MSB_MASK;
323
324 let control_bits = (word.wrapping_sub(THR_MASK) & !word) & MSB_MASK;
327
328 let combined = quote_bits | slash_bits | control_bits;
330
331 if combined != 0 {
332 let byte_index = if cfg!(target_endian = "little") {
336 (combined.trailing_zeros() as usize) / 8
337 } else {
338 (combined.leading_zeros() as usize) / 8
339 };
340 return Some(i + byte_index);
341 }
342
343 i += WORD_SIZE;
344 }
345
346 if i < bytes.len() {
348 if let Some(pos) = bytes[i..]
349 .iter()
350 .position(|&b| ESCAPE_DECISION_TABLE[b as usize] != 0)
351 {
352 return Some(i + pos);
353 }
354 }
355
356 None
357 }
358
359 #[cfg(all(feature = "simd", not(nightly), not(target_arch = "x86_64")))]
360 compile_error! { "simd requires nightly or target_arch = \"x86_64\"" }
361}
362
363impl<'a> Iterator for EscapeTokens<'a> {
364 type Item = EscapedToken<'a>;
365
366 #[inline]
367 fn next(&mut self) -> Option<Self::Item> {
368 if self.bytes.is_empty() {
369 return None;
370 }
371
372 if let Some(escaped) = Self::escape(self.bytes[0]) {
373 self.bytes = &self.bytes[1..];
376 Some(EscapedToken::Escaped(escaped))
377 } else {
378 let (literal, rest) = unsafe { Self::split_at_escape(self.bytes) };
381 self.bytes = rest;
382 Some(EscapedToken::Literal(literal))
383 }
384 }
385
386 fn size_hint(&self) -> (usize, Option<usize>) {
387 if self.bytes.is_empty() {
388 (0, Some(0))
389 } else {
390 (1, Some(self.bytes.len()))
392 }
393 }
394}
395
396impl<'a> FusedIterator for EscapeTokens<'a> {}
397
398impl fmt::Display for EscapeTokens<'_> {
399 #[inline]
400 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
401 for token in self.clone() {
402 f.write_str(token.as_str())?;
403 }
404 Ok(())
405 }
406}
407
408#[cfg(feature = "alloc")]
409impl<'a> From<EscapeTokens<'a>> for Cow<'a, str> {
410 fn from(mut iter: EscapeTokens<'a>) -> Self {
417 match iter.next() {
418 None => Cow::Borrowed(""),
419 Some(EscapedToken::Literal(s)) if iter.bytes.is_empty() => {
420 Cow::Borrowed(s)
422 }
423 Some(first) => {
424 let mut s = String::with_capacity(first.as_str().len() + iter.bytes.len());
426 s.push_str(first.as_str());
427 s.extend(iter);
428 Cow::Owned(s)
429 }
430 }
431 }
432}
433
434#[inline]
442pub fn unescape<I: AsRef<[u8]> + ?Sized>(input: &I) -> UnescapeTokens<'_> {
443 UnescapeTokens {
444 bytes: input.as_ref(),
445 }
446}
447
448#[derive(Debug, Clone, Copy, PartialEq, Eq)]
452pub enum UnescapedToken<'a> {
453 Literal(&'a [u8]),
455 Unescaped(char),
457}
458
459impl UnescapedToken<'_> {
460 pub fn display_utf8(&self) -> DisplayUnescapedToken<'_> {
465 DisplayUnescapedToken {
466 token: self,
467 lossy: true,
468 }
469 }
470
471 pub fn display_utf8_lossy(&self) -> DisplayUnescapedToken<'_> {
476 DisplayUnescapedToken {
477 token: self,
478 lossy: true,
479 }
480 }
481
482 #[inline(always)]
483 const fn len(&self) -> usize {
484 match self {
485 UnescapedToken::Literal(literal) => literal.len(),
486 UnescapedToken::Unescaped(ch) => ch.len_utf8(),
487 }
488 }
489}
490
491pub struct DisplayUnescapedToken<'a> {
493 token: &'a UnescapedToken<'a>,
494 lossy: bool,
495}
496
497impl fmt::Display for DisplayUnescapedToken<'_> {
498 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
499 match self.token {
500 UnescapedToken::Literal(bytes) => crate::display_bytes_utf8(bytes, f, self.lossy),
501 UnescapedToken::Unescaped(c) => f.write_char(*c),
502 }
503 }
504}
505
506#[derive(Clone, Debug)]
517#[must_use = "iterators are lazy and do nothing unless consumed"]
518pub struct UnescapeTokens<'a> {
519 bytes: &'a [u8],
520}
521
522impl<'a> UnescapeTokens<'a> {
523 #[inline]
525 pub const fn new(bytes: &'a [u8]) -> Self {
526 Self { bytes }
527 }
528
529 #[inline]
535 pub const fn remnant(&self) -> &'a [u8] {
536 self.bytes
537 }
538
539 #[cfg(feature = "alloc")]
551 pub fn decode_utf8(self) -> Result<Cow<'a, str>, DecodeUtf8Error> {
552 match self.try_into().map_err(DecodeUtf8Error::Unescape)? {
553 Cow::Borrowed(bytes) => str::from_utf8(bytes)
554 .map(Cow::Borrowed)
555 .map_err(DecodeUtf8Error::Utf8),
556 Cow::Owned(bytes) => String::from_utf8(bytes)
557 .map(Cow::Owned)
558 .map_err(|e| DecodeUtf8Error::Utf8(e.utf8_error())),
559 }
560 }
561
562 #[cfg(feature = "alloc")]
571 pub fn decode_utf8_lossy(self) -> Result<Cow<'a, str>, UnescapeError> {
572 use crate::decode_utf8_lossy;
573
574 Ok(decode_utf8_lossy(self.try_into()?))
575 }
576
577 pub fn display_utf8(self) -> DisplayUnescapeTokens<'a> {
582 DisplayUnescapeTokens {
583 inner: self,
584 lossy: false,
585 }
586 }
587
588 pub fn display_utf8_lossy(self) -> DisplayUnescapeTokens<'a> {
593 DisplayUnescapeTokens {
594 inner: self,
595 lossy: true,
596 }
597 }
598
599 #[inline(always)]
605 pub(crate) fn split_at_escape(bytes: &'a [u8]) -> (&'a [u8], &'a [u8]) {
606 let pos = match memchr(b'\\', bytes) {
607 Some(p) => p,
609 None => bytes.len(),
611 };
612
613 let (literal, rest) = bytes.split_at(pos);
614 (literal, rest)
615 }
616
617 #[inline(always)]
622 pub(crate) fn handle_escape(bytes: &mut &'a [u8]) -> Result<char, UnescapeError> {
623 match bytes.first() {
624 Some(b'u') => {
625 *bytes = &bytes[1..];
627 Self::handle_unicode_escape(bytes)
628 }
629 Some(&byte) => {
630 match UNESCAPE_TABLE[byte as usize] {
632 Some(c) => {
633 *bytes = &bytes[1..];
634 Ok(c)
635 }
636 None => {
637 Err(UnescapeError {
638 kind: UnescapeErrorKind::InvalidEscape(InvalidEscapeError {
639 found: byte,
640 }),
641 offset: 1,
643 })
644 }
645 }
646 }
647 None => {
648 Err(UnescapeError {
650 kind: UnescapeErrorKind::UnexpectedEof,
651 offset: 1,
653 })
654 }
655 }
656 }
657
658 #[inline(always)]
664 fn handle_unicode_escape(bytes: &mut &'a [u8]) -> Result<char, UnescapeError> {
665 let first = Self::parse_hex4(bytes, 2)?;
669 *bytes = &bytes[4..];
670
671 if (0xD800..=0xDBFF).contains(&first) {
673 #[allow(clippy::get_first)]
676 match (bytes.get(0), bytes.get(1)) {
677 (Some(b'\\'), Some(b'u')) => {
678 match Self::parse_hex4(&bytes[2..], 8) {
682 Ok(low) if (0xDC00..=0xDFFF).contains(&low) => {
683 let high_t = first as u32;
685 let low_t = low as u32;
686 let code = 0x10000 + (((high_t - 0xD800) << 10) | (low_t - 0xDC00));
687 let result_char = char::from_u32(code).expect(
688 "valid surrogate pair math should always produce a valid char",
689 );
690
691 *bytes = &bytes[6..];
693 return Ok(result_char);
694 }
695 Ok(_) => {
696 return Err(UnescapeError {
699 kind: UnescapeErrorKind::LoneSurrogate(LoneSurrogateError {
700 surrogate: first,
701 }),
702 offset: 6,
703 });
704 }
705 Err(err) => {
706 return Err(err);
709 }
710 }
711 }
712 (Some(b'\\'), None) => {
713 return Err(UnescapeError {
714 kind: UnescapeErrorKind::UnexpectedEof,
715 offset: 7,
716 });
717 }
718 (None, None) => {
719 return Err(UnescapeError {
721 kind: UnescapeErrorKind::UnexpectedEof,
722 offset: 6,
723 });
724 }
725 _ => {
727 return Err(UnescapeError {
729 kind: UnescapeErrorKind::LoneSurrogate(LoneSurrogateError {
730 surrogate: first,
731 }),
732 offset: 6,
733 });
734 }
735 }
736 }
737
738 match char::from_u32(first as u32) {
740 Some(c) => Ok(c),
741 None => {
742 Err(UnescapeError {
744 kind: UnescapeErrorKind::LoneSurrogate(LoneSurrogateError { surrogate: first }),
745 offset: 6,
747 })
748 }
749 }
750 }
751
752 #[inline(always)]
754 fn parse_hex4(slice: &[u8], base_offset: u8) -> Result<u16, UnescapeError> {
755 if let Some(chunk) = slice.get(..4) {
758 let b0 = chunk[0];
764 let b1 = chunk[1];
765 let b2 = chunk[2];
766 let b3 = chunk[3];
767
768 if let (Some(v0), Some(v1), Some(v2), Some(v3)) = (
770 HEX[b0 as usize],
771 HEX[b1 as usize],
772 HEX[b2 as usize],
773 HEX[b3 as usize],
774 ) {
775 let result = (v0 as u16) << 12 | (v1 as u16) << 8 | (v2 as u16) << 4 | (v3 as u16);
777 return Ok(result);
778 }
779
780 }
784
785 #[cold]
789 fn handle_error(slice: &[u8], base_offset: u8) -> UnescapeError {
790 for (i, &b) in slice.iter().enumerate() {
792 if HEX[b as usize].is_none() {
793 return UnescapeError {
795 kind: UnescapeErrorKind::InvalidHex(InvalidHexError { found: b }),
796 offset: base_offset + i as u8,
797 };
798 }
799 }
800
801 UnescapeError {
804 kind: UnescapeErrorKind::UnexpectedEof,
805 offset: base_offset + slice.len() as u8,
807 }
808 }
809
810 Err(handle_error(slice, base_offset))
811 }
812}
813
814impl<'a> Iterator for UnescapeTokens<'a> {
815 type Item = Result<UnescapedToken<'a>, UnescapeError>;
816
817 #[inline]
818 fn next(&mut self) -> Option<Self::Item> {
819 if self.bytes.is_empty() {
820 return None;
821 }
822
823 if self.bytes[0] == b'\\' {
825 Some({
827 let mut remainder = &self.bytes[1..];
830 match UnescapeTokens::handle_escape(&mut remainder) {
831 Ok(unescaped_char) => {
832 self.bytes = remainder;
833 Ok(UnescapedToken::Unescaped(unescaped_char))
834 }
835 Err(err) => Err(err),
836 }
837 })
838 } else {
839 let (literal, rest) = Self::split_at_escape(self.bytes);
841 self.bytes = rest;
842 Some(Ok(UnescapedToken::Literal(literal)))
843 }
844 }
845
846 fn size_hint(&self) -> (usize, Option<usize>) {
847 if self.bytes.is_empty() {
848 (0, Some(0))
849 } else {
850 (
852 self.bytes.len().saturating_add(1) / 6,
853 Some(self.bytes.len()),
854 )
855 }
856 }
857}
858
859impl<'a> FusedIterator for UnescapeTokens<'a> {}
860
861#[cfg(feature = "alloc")]
862impl<'a> TryFrom<UnescapeTokens<'a>> for Cow<'a, [u8]> {
863 type Error = UnescapeError;
864
865 fn try_from(mut value: UnescapeTokens<'a>) -> Result<Self, Self::Error> {
871 match value.next() {
872 None => Ok(Cow::Borrowed(b"")),
873 Some(Ok(UnescapedToken::Literal(literal))) if value.bytes.is_empty() => {
874 Ok(Cow::Borrowed(literal))
876 }
877 Some(Ok(first_token)) => {
878 let mut buf = Vec::with_capacity(first_token.len() + value.bytes.len());
880
881 let process_token = |buf: &mut Vec<u8>, token: UnescapedToken| match token {
882 UnescapedToken::Literal(bytes) => buf.extend_from_slice(bytes),
883 UnescapedToken::Unescaped(c) => {
884 append_char(buf, c);
885 }
886 };
887
888 process_token(&mut buf, first_token);
889 for item in value {
890 process_token(&mut buf, item?);
891 }
892
893 Ok(Cow::Owned(buf))
894 }
895 Some(Err(e)) => Err(e),
896 }
897 }
898}
899
900pub struct DisplayUnescapeTokens<'a> {
902 inner: UnescapeTokens<'a>,
903 lossy: bool,
904}
905
906impl<'a> fmt::Display for DisplayUnescapeTokens<'a> {
907 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
908 for chunk_result in self.inner.clone() {
909 match chunk_result {
910 Ok(token) => {
911 let display_chunk = DisplayUnescapedToken {
912 token: &token,
913 lossy: self.lossy,
914 };
915 write!(f, "{}", display_chunk)?;
916 }
917 Err(_) => return Err(fmt::Error), }
919 }
920 Ok(())
921 }
922}
923
924const ESCAPE_TABLE: [Option<&'static str>; 256] = {
932 let mut table: [Option<&'static str>; 256] = [None; 256];
933
934 table[b'"' as usize] = Some(r#"\""#);
936 table[b'\\' as usize] = Some(r#"\\"#);
937
938 table[0x08] = Some(r#"\b"#); table[0x09] = Some(r#"\t"#); table[0x0A] = Some(r#"\n"#); table[0x0C] = Some(r#"\f"#); table[0x0D] = Some(r#"\r"#); table[0x00] = Some(r#"\u0000"#);
948 table[0x01] = Some(r#"\u0001"#);
949 table[0x02] = Some(r#"\u0002"#);
950 table[0x03] = Some(r#"\u0003"#);
951 table[0x04] = Some(r#"\u0004"#);
952 table[0x05] = Some(r#"\u0005"#);
953 table[0x06] = Some(r#"\u0006"#);
954 table[0x07] = Some(r#"\u0007"#);
955 table[0x0B] = Some(r#"\u000b"#);
957 table[0x0E] = Some(r#"\u000e"#);
958 table[0x0F] = Some(r#"\u000f"#);
959 table[0x10] = Some(r#"\u0010"#);
960 table[0x11] = Some(r#"\u0011"#);
961 table[0x12] = Some(r#"\u0012"#);
962 table[0x13] = Some(r#"\u0013"#);
963 table[0x14] = Some(r#"\u0014"#);
964 table[0x15] = Some(r#"\u0015"#);
965 table[0x16] = Some(r#"\u0016"#);
966 table[0x17] = Some(r#"\u0017"#);
967 table[0x18] = Some(r#"\u0018"#);
968 table[0x19] = Some(r#"\u0019"#);
969 table[0x1A] = Some(r#"\u001a"#);
970 table[0x1B] = Some(r#"\u001b"#);
971 table[0x1C] = Some(r#"\u001c"#);
972 table[0x1D] = Some(r#"\u001d"#);
973 table[0x1E] = Some(r#"\u001e"#);
974 table[0x1F] = Some(r#"\u001f"#);
975
976 table
977};
978
979#[doc(hidden)]
981#[allow(unused)]
985pub const ESCAPE_DECISION_TABLE: [u8; 256] = {
986 let mut table = [0u8; 256];
987 let mut i = 0;
988 while i < 256 {
989 if ESCAPE_TABLE[i].is_some() {
990 table[i] = 1;
991 }
992 i += 1;
993 }
994 table
995};
996
997const UNESCAPE_TABLE: [Option<char>; 256] = {
999 let mut tbl: [Option<char>; 256] = [None; 256];
1000 tbl[b'"' as usize] = Some('\"');
1001 tbl[b'\\' as usize] = Some('\\');
1002 tbl[b'/' as usize] = Some('/');
1003 tbl[b'b' as usize] = Some('\x08');
1004 tbl[b'f' as usize] = Some('\x0C');
1005 tbl[b'n' as usize] = Some('\n');
1006 tbl[b'r' as usize] = Some('\r');
1007 tbl[b't' as usize] = Some('\t');
1008 tbl
1009};
1010
1011const HEX: [Option<u8>; 256] = {
1013 let mut table = [None; 256];
1014 let mut i = 0;
1015 while i < 256 {
1016 table[i] = match i as u8 {
1017 b'0'..=b'9' => Some(i as u8 - b'0'),
1018 b'a'..=b'f' => Some(i as u8 - b'a' + 10),
1019 b'A'..=b'F' => Some(i as u8 - b'A' + 10),
1020 _ => None,
1021 };
1022 i += 1;
1023 }
1024 table
1025};
1026
1027#[inline]
1030pub(crate) fn append_char(buf: &mut Vec<u8>, c: char) {
1031 let char_len = c.len_utf8();
1033 let old_len = buf.len();
1034 buf.resize(old_len + char_len, 0);
1035 c.encode_utf8(&mut buf[old_len..]);
1036}
1037
1038#[cfg(feature = "alloc")]
1043mod iter_traits {
1044 use super::{EscapedToken, UnescapedToken, append_char};
1045 use alloc::string::String;
1046 use alloc::vec::Vec;
1047
1048 impl<'a> FromIterator<EscapedToken<'a>> for String {
1050 #[inline]
1051 fn from_iter<I: IntoIterator<Item = EscapedToken<'a>>>(iter: I) -> String {
1052 let mut s = String::new();
1053 s.extend(iter);
1054 s
1055 }
1056 }
1057
1058 impl<'a> Extend<EscapedToken<'a>> for String {
1060 #[inline]
1061 fn extend<I: IntoIterator<Item = EscapedToken<'a>>>(&mut self, iter: I) {
1062 iter.into_iter().for_each(move |token| {
1063 self.push_str(token.as_str());
1064 });
1065 }
1066 }
1067
1068 impl<'a> FromIterator<UnescapedToken<'a>> for Vec<u8> {
1070 #[inline]
1071 fn from_iter<I: IntoIterator<Item = UnescapedToken<'a>>>(iter: I) -> Vec<u8> {
1072 let mut buf = Vec::new();
1073 buf.extend(iter);
1074 buf
1075 }
1076 }
1077
1078 impl<'a> Extend<UnescapedToken<'a>> for Vec<u8> {
1080 #[inline]
1081 fn extend<I: IntoIterator<Item = UnescapedToken<'a>>>(&mut self, iter: I) {
1082 iter.into_iter().for_each(move |token| match token {
1083 UnescapedToken::Literal(literal) => self.extend_from_slice(literal),
1084 UnescapedToken::Unescaped(ch) => append_char(self, ch),
1085 })
1086 }
1087 }
1088}
1089
1090#[cfg(test)]
1091mod tests {
1092 use super::*;
1093
1094 #[test]
1095 fn test_empty_string() {
1096 let mut iter = UnescapeTokens::new(b"");
1097 assert_eq!(iter.next(), None);
1098 }
1099
1100 #[test]
1101 fn test_pure_literal() {
1102 let mut iter = UnescapeTokens::new(b"hello world");
1103 assert_eq!(
1104 iter.next(),
1105 Some(Ok(UnescapedToken::Literal(b"hello world")))
1106 );
1107 assert_eq!(iter.next(), None);
1108 }
1109
1110 #[test]
1111 fn test_simple_escapes() {
1112 let mut iter = UnescapeTokens::new(b"a\\nb\\tc");
1113 assert_eq!(iter.next(), Some(Ok(UnescapedToken::Literal(b"a"))));
1114 assert_eq!(iter.next(), Some(Ok(UnescapedToken::Unescaped('\n'))));
1115 assert_eq!(iter.next(), Some(Ok(UnescapedToken::Literal(b"b"))));
1116 assert_eq!(iter.next(), Some(Ok(UnescapedToken::Unescaped('\t'))));
1117 assert_eq!(iter.next(), Some(Ok(UnescapedToken::Literal(b"c"))));
1118 assert_eq!(iter.next(), None);
1119 }
1120
1121 #[test]
1122 fn test_starts_with_escape() {
1123 let mut iter = UnescapeTokens::new(b"\\nhello");
1124 assert_eq!(iter.next(), Some(Ok(UnescapedToken::Unescaped('\n'))));
1125 assert_eq!(iter.next(), Some(Ok(UnescapedToken::Literal(b"hello"))));
1126 assert_eq!(iter.next(), None);
1127 }
1128
1129 #[test]
1130 fn test_ends_with_escape() {
1131 let mut iter = UnescapeTokens::new(b"hello\\n");
1132 assert_eq!(iter.next(), Some(Ok(UnescapedToken::Literal(b"hello"))));
1133 assert_eq!(iter.next(), Some(Ok(UnescapedToken::Unescaped('\n'))));
1134 assert_eq!(iter.next(), None);
1135 }
1136
1137 #[test]
1138 fn test_unicode_and_surrogate() {
1139 let mut iter = UnescapeTokens::new(b"A is \\u0041, smiley is \\uD83D\\uDE00!");
1140 assert_eq!(iter.next(), Some(Ok(UnescapedToken::Literal(b"A is "))));
1141 assert_eq!(iter.next(), Some(Ok(UnescapedToken::Unescaped('A'))));
1142 assert_eq!(
1143 iter.next(),
1144 Some(Ok(UnescapedToken::Literal(b", smiley is ")))
1145 );
1146 assert_eq!(iter.next(), Some(Ok(UnescapedToken::Unescaped('😀'))));
1147 assert_eq!(iter.next(), Some(Ok(UnescapedToken::Literal(b"!"))));
1148 assert_eq!(iter.next(), None);
1149 }
1150
1151 #[test]
1152 fn test_invalid_escape_yields_literal_first() {
1153 let mut iter = UnescapeTokens::new(b"ValidPart\\zInvalid");
1154 assert_eq!(iter.next(), Some(Ok(UnescapedToken::Literal(b"ValidPart"))));
1156 let err = iter.next().unwrap().unwrap_err();
1158 assert_eq!(
1159 err,
1160 UnescapeError {
1161 kind: UnescapeErrorKind::InvalidEscape(InvalidEscapeError { found: b'z' }),
1162 offset: 1,
1163 }
1164 );
1165 assert_eq!(iter.remnant(), b"\\zInvalid");
1167 assert_eq!(iter.next(), Some(Err(err)));
1168 }
1169
1170 #[test]
1171 fn test_sticky_error_behavior() {
1172 let mut iter = UnescapeTokens::new(b"a\\zb");
1173 assert_eq!(iter.next(), Some(Ok(UnescapedToken::Literal(b"a"))));
1174
1175 let err1 = iter.next().unwrap().unwrap_err();
1177 assert_eq!(
1178 err1.kind,
1179 UnescapeErrorKind::InvalidEscape(InvalidEscapeError { found: b'z' })
1180 );
1181 assert_eq!(iter.remnant(), b"\\zb");
1182
1183 let err2 = iter.next().unwrap().unwrap_err();
1185 assert_eq!(err1, err2);
1186 assert_eq!(iter.remnant(), b"\\zb"); }
1188
1189 #[test]
1190 fn test_incomplete_escape_at_end() {
1191 let mut iter = UnescapeTokens::new(b"ValidPart\\u12");
1192 assert_eq!(iter.next(), Some(Ok(UnescapedToken::Literal(b"ValidPart"))));
1193
1194 assert_eq!(iter.remnant(), b"\\u12");
1196
1197 let err = iter.next().unwrap().unwrap_err();
1198 assert_eq!(
1199 err,
1200 UnescapeError {
1201 kind: UnescapeErrorKind::UnexpectedEof,
1202 offset: 4,
1203 }
1204 );
1205
1206 assert_eq!(iter.remnant(), b"\\u12");
1207 assert_eq!(iter.next(), Some(Err(err)));
1208 }
1209
1210 #[test]
1211 fn test_dangling_backslash() {
1212 let mut iter = UnescapeTokens::new(b"end with \\");
1213 assert_eq!(iter.next(), Some(Ok(UnescapedToken::Literal(b"end with "))));
1214 let err = iter.next().unwrap().unwrap_err();
1215 assert_eq!(
1216 err,
1217 UnescapeError {
1218 kind: UnescapeErrorKind::UnexpectedEof,
1219 offset: 1,
1220 }
1221 );
1222 assert_eq!(iter.next(), Some(Err(err)));
1223 }
1224
1225 #[test]
1226 fn test_display_unescape_tokens() {
1227 let iter = UnescapeTokens::new(b"hello \\u0041\\nworld");
1228 let display = iter.display_utf8();
1229 assert_eq!(alloc::format!("{}", display), "hello A\nworld");
1230 }
1231
1232 #[test]
1233 fn test_display_unescape_error() {
1234 let iter = UnescapeTokens::new(b"hello\\z");
1235 let mut out = String::new();
1236 write!(out, "{}", iter.display_utf8_lossy()).unwrap_err();
1237 assert!(out.starts_with("hello"));
1240 }
1241
1242 #[test]
1244 fn test_escape_no_escapes() {
1245 let mut iter = EscapeTokens::new("hello world");
1246 assert_eq!(iter.next(), Some(EscapedToken::Literal("hello world")));
1247 assert_eq!(iter.next(), None);
1248 }
1249
1250 #[test]
1251 fn test_escape_simple() {
1252 let mut iter = EscapeTokens::new("hello\nworld");
1253 assert_eq!(iter.next(), Some(EscapedToken::Literal("hello")));
1254 assert_eq!(iter.next(), Some(EscapedToken::Escaped(r#"\n"#)));
1255 assert_eq!(iter.next(), Some(EscapedToken::Literal("world")));
1256 assert_eq!(iter.next(), None);
1257 }
1258
1259 #[test]
1260 fn test_display_escape_tokens() {
1261 let iter = EscapeTokens::new("a\"b\tc");
1262 assert_eq!(alloc::format!("{}", iter), r#"a\"b\tc"#);
1263 }
1264
1265 #[cfg(feature = "alloc")]
1266 #[test]
1267 fn test_escape_to_cow_borrowed() {
1268 let iter = EscapeTokens::new("no escapes here");
1269 let cow: Cow<'_, str> = iter.into();
1270 assert!(matches!(cow, Cow::Borrowed(_)));
1271 assert_eq!(cow, "no escapes here");
1272 }
1273
1274 #[cfg(feature = "alloc")]
1275 #[test]
1276 fn test_escape_to_cow_owned() {
1277 let iter = EscapeTokens::new("has\n an escape");
1278 let cow: Cow<'_, str> = iter.into();
1279 assert!(matches!(cow, Cow::Owned(_)));
1280 assert_eq!(cow, r#"has\n an escape"#);
1281 }
1282
1283 #[cfg(feature = "alloc")]
1284 #[test]
1285 fn test_unescape_to_cow_borrowed() {
1286 let iter = UnescapeTokens::new(b"no escapes here");
1287 let cow: Cow<'_, [u8]> = iter.try_into().unwrap();
1288 assert!(matches!(cow, Cow::Borrowed(_)));
1289 assert_eq!(*cow, *b"no escapes here");
1290 }
1291
1292 #[cfg(feature = "alloc")]
1293 #[test]
1294 fn test_unescape_to_cow_owned() {
1295 let iter = UnescapeTokens::new(b"has\\n an escape");
1296 let cow: Cow<'_, [u8]> = iter.try_into().unwrap();
1297 assert!(matches!(cow, Cow::Owned(_)));
1298 assert_eq!(*cow, *b"has\n an escape");
1299 }
1300}
1301
1302#[cfg(test)]
1303mod find_escape_char_tests {
1304 use std::format;
1305
1306 use super::{ESCAPE_DECISION_TABLE, EscapeTokens};
1307
1308 fn run_test(input: &str, expected: Option<usize>, case_name: &str) {
1310 let result = EscapeTokens::find_escape_char(input.as_bytes());
1311 assert_eq!(result, expected, "Failed test case: '{}'", case_name);
1312 }
1313
1314 #[test]
1315 fn test_no_escapes() {
1316 run_test("", None, "Empty string");
1317 run_test("Hello, world!", None, "Simple ASCII");
1318 run_test("This string is exactly 16 bytes", None, "16-byte ASCII");
1319 run_test(
1320 "This string is over 16 bytes long now",
1321 None,
1322 "Over 16-byte ASCII",
1323 );
1324
1325 run_test("Hello, éàçüö!", None, "Non-ASCII UTF-8");
1328 run_test("Testing with emojis 😀❤️✅", None, "Emojis");
1329 }
1330
1331 #[test]
1332 fn test_single_escapes() {
1333 run_test("\"", Some(0), "Quote at start");
1334 run_test("Hello \" world", Some(6), "Quote in middle");
1335 run_test("Hello\\", Some(5), "Backslash at end");
1336 run_test("\n", Some(0), "Control char (newline) at start");
1337 run_test("Hello\tworld", Some(5), "Control char (tab) in middle");
1338 run_test(
1339 "Control char at end\u{08}",
1340 Some(19),
1341 "Control char (backspace) at end",
1342 );
1343 }
1344
1345 #[test]
1346 fn test_finds_first_of_multiple() {
1347 run_test("a\"b\\c\nd", Some(1), "Finds first quote");
1349 run_test("ab\\c\"d\ne", Some(2), "Finds first backslash");
1350 run_test("abc\nd\"e\\f", Some(3), "Finds first control char");
1351 run_test("\"\n\\", Some(0), "Multiple escapes at start");
1352 }
1353
1354 #[test]
1355 fn test_simd_chunk_boundaries() {
1356 let s15 = "a".repeat(15);
1358 let s16 = "a".repeat(16);
1359 let s17 = "a".repeat(17);
1360
1361 run_test(&format!("{}\"", s15), Some(15), "Escape at index 15");
1363
1364 run_test(&format!("{}\n", s16), Some(16), "Escape at index 16");
1366
1367 run_test(&format!("{}\t", s17), Some(17), "Escape at index 17");
1369
1370 let long = "a".repeat(40);
1372 run_test(
1373 &format!("{}\\\\", long),
1374 Some(40),
1375 "Escape deep in a long string",
1376 );
1377 }
1378
1379 #[test]
1380 fn test_remainder_logic() {
1381 run_test("short\nstring", Some(5), "Short string with escape");
1386 run_test("no escapes", None, "Short string no escape");
1387
1388 let s16 = "a".repeat(16);
1390 run_test(
1391 &format!("{}\"", s16),
1392 Some(16),
1393 "Escape in 1-byte remainder",
1394 );
1395
1396 let s15 = "b".repeat(15);
1398 run_test(
1399 &format!("{}{}\t", s15, s15),
1400 Some(30),
1401 "Escape at end of 15-byte remainder",
1402 );
1403 }
1404
1405 #[test]
1406 fn test_all_escapable_bytes_individually() {
1407 let prefix = "0123456789abcdef"; for byte_val in 0..=255u8 {
1412 let mut test_bytes = prefix.as_bytes().to_vec();
1414 test_bytes.push(byte_val);
1415
1416 let result = EscapeTokens::find_escape_char(&test_bytes);
1417 let expected_to_escape = ESCAPE_DECISION_TABLE[byte_val as usize] == 1;
1418
1419 if expected_to_escape {
1420 assert_eq!(
1422 result,
1423 Some(16),
1424 "Failed to find required escape for byte 0x{:02X}",
1425 byte_val
1426 );
1427 } else {
1428 assert_eq!(
1430 result, None,
1431 "Incorrectly found an escape for byte 0x{:02X}",
1432 byte_val
1433 );
1434 }
1435 }
1436 }
1437}