json_escape_simd/
generic.rs

1#[inline]
2// Slightly modified version of
3// <https://github.com/serde-rs/json/blob/d12e943590208da738c092db92c34b39796a2538/src/ser.rs#L2079>
4// Borrowed from:
5// <https://github.com/oxc-project/oxc-sourcemap/blob/e533e6ca4d08c538d8d4df74eacd29437851591f/src/encode.rs#L331>
6pub fn escape_generic<S: AsRef<str>>(s: S) -> String {
7    let s = s.as_ref();
8    let bytes = s.as_bytes();
9
10    // Estimate capacity - most strings don't need much escaping
11    // Add some padding for potential escapes
12    let estimated_capacity = bytes.len() + bytes.len() / 2 + 2;
13    let mut result = Vec::with_capacity(estimated_capacity);
14
15    result.push(b'"');
16
17    let mut start = 0;
18    let mut i = 0;
19
20    while i < bytes.len() {
21        let b = bytes[i];
22
23        // Use lookup table to check if escaping is needed
24        let escape_byte = ESCAPE[b as usize];
25
26        if escape_byte == 0 {
27            // No escape needed, continue scanning
28            i += 1;
29            continue;
30        }
31
32        // Copy any unescaped bytes before this position
33        if start < i {
34            result.extend_from_slice(&bytes[start..i]);
35        }
36
37        // Handle the escape
38        result.push(b'\\');
39        if escape_byte == b'u' {
40            // Unicode escape for control characters
41            result.extend_from_slice(b"u00");
42            let hex_digits = &HEX_BYTES[b as usize];
43            result.push(hex_digits.0);
44            result.push(hex_digits.1);
45        } else {
46            // Simple escape
47            result.push(escape_byte);
48        }
49
50        i += 1;
51        start = i;
52    }
53
54    // Copy any remaining unescaped bytes
55    if start < bytes.len() {
56        result.extend_from_slice(&bytes[start..]);
57    }
58
59    result.push(b'"');
60
61    // SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
62    unsafe { String::from_utf8_unchecked(result) }
63}
64
65const BB: u8 = b'b'; // \x08
66const TT: u8 = b't'; // \x09
67const NN: u8 = b'n'; // \x0A
68const FF: u8 = b'f'; // \x0C
69const RR: u8 = b'r'; // \x0D
70const QU: u8 = b'"'; // \x22
71const BS: u8 = b'\\'; // \x5C
72pub(crate) const UU: u8 = b'u'; // \x00...\x1F except the ones above
73const __: u8 = 0;
74
75// Lookup table of escape sequences. A value of b'x' at index i means that byte
76// i is escaped as "\x" in JSON. A value of 0 means that byte i is not escaped.
77pub(crate) static ESCAPE: [u8; 256] = [
78    //   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
79    UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0
80    UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1
81    __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
82    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
83    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
84    __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
85    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
86    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
87    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
88    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
89    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
90    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
91    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
92    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
93    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
94    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
95];
96
97// Pre-computed hex digit pairs for control characters
98pub(crate) struct HexPair(u8, u8);
99
100pub(crate) static HEX_BYTES: [HexPair; 32] = [
101    HexPair(b'0', b'0'),
102    HexPair(b'0', b'1'),
103    HexPair(b'0', b'2'),
104    HexPair(b'0', b'3'),
105    HexPair(b'0', b'4'),
106    HexPair(b'0', b'5'),
107    HexPair(b'0', b'6'),
108    HexPair(b'0', b'7'),
109    HexPair(b'0', b'8'),
110    HexPair(b'0', b'9'),
111    HexPair(b'0', b'a'),
112    HexPair(b'0', b'b'),
113    HexPair(b'0', b'c'),
114    HexPair(b'0', b'd'),
115    HexPair(b'0', b'e'),
116    HexPair(b'0', b'f'),
117    HexPair(b'1', b'0'),
118    HexPair(b'1', b'1'),
119    HexPair(b'1', b'2'),
120    HexPair(b'1', b'3'),
121    HexPair(b'1', b'4'),
122    HexPair(b'1', b'5'),
123    HexPair(b'1', b'6'),
124    HexPair(b'1', b'7'),
125    HexPair(b'1', b'8'),
126    HexPair(b'1', b'9'),
127    HexPair(b'1', b'a'),
128    HexPair(b'1', b'b'),
129    HexPair(b'1', b'c'),
130    HexPair(b'1', b'd'),
131    HexPair(b'1', b'e'),
132    HexPair(b'1', b'f'),
133];