json_escape_simd/
generic.rs

1#[inline]
2pub fn escape_generic<S: AsRef<str>>(s: S) -> String {
3    let s = s.as_ref();
4    let bytes = s.as_bytes();
5    // Estimate capacity - most strings don't need much escaping
6    // Add some padding for potential escapes
7    let estimated_capacity = bytes.len() + bytes.len() / 2 + 2;
8    let mut result = Vec::with_capacity(estimated_capacity);
9    result.push(b'"');
10    escape_inner(bytes, &mut result);
11    result.push(b'"');
12    // SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
13    unsafe { String::from_utf8_unchecked(result) }
14}
15
16#[inline]
17// Slightly modified version of
18// <https://github.com/serde-rs/json/blob/d12e943590208da738c092db92c34b39796a2538/src/ser.rs#L2079>
19// Borrowed from:
20// <https://github.com/oxc-project/oxc-sourcemap/blob/e533e6ca4d08c538d8d4df74eacd29437851591f/src/encode.rs#L331>
21pub(crate) fn escape_inner(bytes: &[u8], result: &mut Vec<u8>) {
22    let mut start = 0;
23    let mut i = 0;
24
25    while i < bytes.len() {
26        let b = bytes[i];
27
28        // Use lookup table to check if escaping is needed
29        let escape_byte = ESCAPE[b as usize];
30
31        if escape_byte == 0 {
32            // No escape needed, continue scanning
33            i += 1;
34            continue;
35        }
36
37        // Copy any unescaped bytes before this position
38        if start < i {
39            result.extend_from_slice(&bytes[start..i]);
40        }
41
42        // Handle the escape
43        result.push(b'\\');
44        if escape_byte == b'u' {
45            // Unicode escape for control characters
46            result.extend_from_slice(b"u00");
47            let hex_digits = &HEX_BYTES[b as usize];
48            result.push(hex_digits.0);
49            result.push(hex_digits.1);
50        } else {
51            // Simple escape
52            result.push(escape_byte);
53        }
54
55        i += 1;
56        start = i;
57    }
58
59    // Copy any remaining unescaped bytes
60    if start < bytes.len() {
61        result.extend_from_slice(&bytes[start..]);
62    }
63}
64
65const BB: u8 = b'b'; // \x08
66const TT: u8 = b't'; // \x09
67const NN: u8 = b'n'; // \x0A
68const FF: u8 = b'f'; // \x0C
69const RR: u8 = b'r'; // \x0D
70const QU: u8 = b'"'; // \x22
71const BS: u8 = b'\\'; // \x5C
72pub(crate) const UU: u8 = b'u'; // \x00...\x1F except the ones above
73const __: u8 = 0;
74
75// Lookup table of escape sequences. A value of b'x' at index i means that byte
76// i is escaped as "\x" in JSON. A value of 0 means that byte i is not escaped.
77pub(crate) static ESCAPE: [u8; 256] = [
78    //   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
79    UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0
80    UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1
81    __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
82    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
83    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
84    __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
85    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
86    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
87    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
88    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
89    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
90    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
91    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
92    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
93    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
94    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
95];
96
97// Pre-computed hex digit pairs for control characters
98pub(crate) struct HexPair(pub(crate) u8, pub(crate) u8);
99
100pub(crate) static HEX_BYTES: [HexPair; 32] = [
101    HexPair(b'0', b'0'),
102    HexPair(b'0', b'1'),
103    HexPair(b'0', b'2'),
104    HexPair(b'0', b'3'),
105    HexPair(b'0', b'4'),
106    HexPair(b'0', b'5'),
107    HexPair(b'0', b'6'),
108    HexPair(b'0', b'7'),
109    HexPair(b'0', b'8'),
110    HexPair(b'0', b'9'),
111    HexPair(b'0', b'a'),
112    HexPair(b'0', b'b'),
113    HexPair(b'0', b'c'),
114    HexPair(b'0', b'd'),
115    HexPair(b'0', b'e'),
116    HexPair(b'0', b'f'),
117    HexPair(b'1', b'0'),
118    HexPair(b'1', b'1'),
119    HexPair(b'1', b'2'),
120    HexPair(b'1', b'3'),
121    HexPair(b'1', b'4'),
122    HexPair(b'1', b'5'),
123    HexPair(b'1', b'6'),
124    HexPair(b'1', b'7'),
125    HexPair(b'1', b'8'),
126    HexPair(b'1', b'9'),
127    HexPair(b'1', b'a'),
128    HexPair(b'1', b'b'),
129    HexPair(b'1', b'c'),
130    HexPair(b'1', b'd'),
131    HexPair(b'1', b'e'),
132    HexPair(b'1', b'f'),
133];