json_escape_simd/
generic.rs

1#[inline]
2pub fn escape_generic<S: AsRef<str>>(s: S) -> String {
3    let s = s.as_ref();
4    let bytes = s.as_bytes();
5    // Estimate capacity - most strings don't need much escaping
6    // Add some padding for potential escapes
7    let estimated_capacity = bytes.len() + bytes.len() / 2 + 2;
8    let mut result = Vec::with_capacity(estimated_capacity);
9    result.push(b'"');
10    escape_inner(bytes, &mut result);
11    result.push(b'"');
12    // SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
13    unsafe { String::from_utf8_unchecked(result) }
14}
15
16#[inline]
17pub fn escape_into_generic<S: AsRef<str>>(s: S, output: &mut Vec<u8>) {
18    let s = s.as_ref();
19    let bytes = s.as_bytes();
20    output.push(b'"');
21    escape_inner(bytes, output);
22    output.push(b'"');
23}
24
25#[inline]
26// Slightly modified version of
27// <https://github.com/serde-rs/json/blob/d12e943590208da738c092db92c34b39796a2538/src/ser.rs#L2079>
28// Borrowed from:
29// <https://github.com/oxc-project/oxc-sourcemap/blob/e533e6ca4d08c538d8d4df74eacd29437851591f/src/encode.rs#L331>
30pub(crate) fn escape_inner(bytes: &[u8], result: &mut Vec<u8>) {
31    let mut start = 0;
32    let mut i = 0;
33
34    while i < bytes.len() {
35        let b = bytes[i];
36
37        // Use lookup table to check if escaping is needed
38        let escape_byte = ESCAPE[b as usize];
39
40        if escape_byte == 0 {
41            // No escape needed, continue scanning
42            i += 1;
43            continue;
44        }
45
46        // Copy any unescaped bytes before this position
47        if start < i {
48            result.extend_from_slice(&bytes[start..i]);
49        }
50
51        // Handle the escape
52        result.push(b'\\');
53        if escape_byte == b'u' {
54            // Unicode escape for control characters
55            result.extend_from_slice(b"u00");
56            let hex_digits = &HEX_BYTES[b as usize];
57            result.push(hex_digits.0);
58            result.push(hex_digits.1);
59        } else {
60            // Simple escape
61            result.push(escape_byte);
62        }
63
64        i += 1;
65        start = i;
66    }
67
68    // Copy any remaining unescaped bytes
69    if start < bytes.len() {
70        result.extend_from_slice(&bytes[start..]);
71    }
72}
73
74const BB: u8 = b'b'; // \x08
75const TT: u8 = b't'; // \x09
76const NN: u8 = b'n'; // \x0A
77const FF: u8 = b'f'; // \x0C
78const RR: u8 = b'r'; // \x0D
79const QU: u8 = b'"'; // \x22
80const BS: u8 = b'\\'; // \x5C
81pub(crate) const UU: u8 = b'u'; // \x00...\x1F except the ones above
82const __: u8 = 0;
83
84// Lookup table of escape sequences. A value of b'x' at index i means that byte
85// i is escaped as "\x" in JSON. A value of 0 means that byte i is not escaped.
86pub(crate) static ESCAPE: [u8; 256] = [
87    //   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
88    UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0
89    UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1
90    __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
91    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
92    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
93    __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
94    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
95    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
96    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
97    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
98    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
99    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
100    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
101    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
102    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
103    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
104];
105
106// Pre-computed hex digit pairs for control characters
107pub(crate) struct HexPair(pub(crate) u8, pub(crate) u8);
108
109pub(crate) static HEX_BYTES: [HexPair; 32] = [
110    HexPair(b'0', b'0'),
111    HexPair(b'0', b'1'),
112    HexPair(b'0', b'2'),
113    HexPair(b'0', b'3'),
114    HexPair(b'0', b'4'),
115    HexPair(b'0', b'5'),
116    HexPair(b'0', b'6'),
117    HexPair(b'0', b'7'),
118    HexPair(b'0', b'8'),
119    HexPair(b'0', b'9'),
120    HexPair(b'0', b'a'),
121    HexPair(b'0', b'b'),
122    HexPair(b'0', b'c'),
123    HexPair(b'0', b'd'),
124    HexPair(b'0', b'e'),
125    HexPair(b'0', b'f'),
126    HexPair(b'1', b'0'),
127    HexPair(b'1', b'1'),
128    HexPair(b'1', b'2'),
129    HexPair(b'1', b'3'),
130    HexPair(b'1', b'4'),
131    HexPair(b'1', b'5'),
132    HexPair(b'1', b'6'),
133    HexPair(b'1', b'7'),
134    HexPair(b'1', b'8'),
135    HexPair(b'1', b'9'),
136    HexPair(b'1', b'a'),
137    HexPair(b'1', b'b'),
138    HexPair(b'1', b'c'),
139    HexPair(b'1', b'd'),
140    HexPair(b'1', b'e'),
141    HexPair(b'1', b'f'),
142];