json_escape_simd/
lib.rs

1//! Optimized SIMD routines for escaping JSON strings.
2//!
3//! ## <div class="warning">Important</div>
4//!
5//! On aarch64 NEON hosts the available register width is **128** bits, which is narrower than the lookup table this implementation prefers. As a result the SIMD path may not outperform the generic fallback, which is reflected in the benchmark numbers below.
6//!
7//! On some modern macOS devices with larger register numbers, the SIMD path may outperform the generic fallback, see the [M3 max benchmark](#apple-m3-max) below.
8//!
9//! ### Note
10//!
11//! The `force_aarch64_neon` feature flag can be used to force use of the neon implementation on aarch64. This is useful for the benchmark.
12//!
13//! ## Benchmarks
14//!
15//! Numbers below come from `cargo bench` runs on GitHub Actions hardware. Criterion reports are summarized to make it easier to spot relative performance. "vs fastest" shows how much slower each implementation is compared to the fastest entry in the table (1.00× means fastest).
16//!
17//! ### GitHub Actions x86_64 (`ubuntu-latest`)
18//!
19//! `AVX2` enabled.
20//!
21//! **RxJS payload (~10k iterations)**
22//!
23//! | Implementation        | Median time   | vs fastest |
24//! | --------------------- | ------------- | ---------- |
25//! | **`escape simd`**     | **345.06 µs** | **1.00×**  |
26//! | `escape v_jsonescape` | 576.25 µs     | 1.67×      |
27//! | `escape generic`      | 657.94 µs     | 1.91×      |
28//! | `serde_json`          | 766.72 µs     | 2.22×      |
29//! | `json-escape`         | 782.65 µs     | 2.27×      |
30//!
31//! **Fixtures payload (~300 iterations)**
32//!
33//! | Implementation        | Median time  | vs fastest |
34//! | --------------------- | ------------ | ---------- |
35//! | **`escape simd`**     | **12.84 ms** | **1.00×**  |
36//! | `escape v_jsonescape` | 19.66 ms     | 1.53×      |
37//! | `escape generic`      | 22.53 ms     | 1.75×      |
38//! | `serde_json`          | 24.65 ms     | 1.92×      |
39//! | `json-escape`         | 26.64 ms     | 2.07×      |
40//!
41//! ### GitHub Actions aarch64 (`ubuntu-24.04-arm`)
42//!
43//! Neon enabled.
44//!
45//! **RxJS payload (~10k iterations)**
46//!
47//! | Implementation        | Median time   | vs fastest |
48//! | --------------------- | ------------- | ---------- |
49//! | **`escape generic`**  | **546.89 µs** | **1.00×**  |
50//! | `escape simd`         | 589.29 µs     | 1.08×      |
51//! | `serde_json`          | 612.33 µs     | 1.12×      |
52//! | `json-escape`         | 624.66 µs     | 1.14×      |
53//! | `escape v_jsonescape` | 789.14 µs     | 1.44×      |
54//!
55//! **Fixtures payload (~300 iterations)**
56//!
57//! | Implementation        | Median time  | vs fastest |
58//! | --------------------- | ------------ | ---------- |
59//! | **`escape generic`**  | **17.81 ms** | **1.00×**  |
60//! | `serde_json`          | 19.77 ms     | 1.11×      |
61//! | `json-escape`         | 20.84 ms     | 1.17×      |
62//! | `escape simd`         | 21.04 ms     | 1.18×      |
63//! | `escape v_jsonescape` | 25.57 ms     | 1.44×      |
64//!
65//! ### GitHub Actions macOS (`macos-latest`)
66//!
67//! Apple M1 chip
68//!
69//! **RxJS payload (~10k iterations)**
70//!
71//! | Implementation        | Median time   | vs fastest |
72//! | --------------------- | ------------- | ---------- |
73//! | **`escape generic`**  | **759.07 µs** | **1.00×**  |
74//! | `escape simd`         | 764.98 µs     | 1.01×      |
75//! | `serde_json`          | 793.91 µs     | 1.05×      |
76//! | `json-escape`         | 868.21 µs     | 1.14×      |
77//! | `escape v_jsonescape` | 926.00 µs     | 1.22×      |
78//!
79//! **Fixtures payload (~300 iterations)**
80//!
81//! | Implementation        | Median time  | vs fastest |
82//! | --------------------- | ------------ | ---------- |
83//! | **`serde_json`**      | **26.41 ms** | **1.00×**  |
84//! | `escape generic`      | 26.43 ms     | 1.00×      |
85//! | `escape simd`         | 26.42 ms     | 1.00×      |
86//! | `json-escape`         | 28.94 ms     | 1.10×      |
87//! | `escape v_jsonescape` | 29.22 ms     | 1.11×      |
88//!
89//! ### Apple M3 Max
90//!
91//! **RxJS payload (~10k iterations)**
92//!
93//! | Implementation        | Median time   | vs fastest |
94//! | --------------------- | ------------- | ---------- |
95//! | **`escape simd`**     | **307.20 µs** | **1.00×**  |
96//! | `escape generic`      | 490.00 µs     | 1.60×      |
97//! | `serde_json`          | 570.35 µs     | 1.86×      |
98//! | `escape v_jsonescape` | 599.72 µs     | 1.95×      |
99//! | `json-escape`         | 644.73 µs     | 2.10×      |
100//!
101//! **Fixtures payload (~300 iterations)**
102//!
103//! | Implementation        | Median time  | vs fastest |
104//! | --------------------- | ------------ | ---------- |
105//! | **`escape generic`**  | **17.89 ms** | **1.00×**  |
106//! | **`escape simd`**     | **17.92 ms** | **1.00×**  |
107//! | `serde_json`          | 19.78 ms     | 1.11×      |
108//! | `escape v_jsonescape` | 21.09 ms     | 1.18×      |
109//! | `json-escape`         | 22.43 ms     | 1.25×      |
110
111#[cfg(target_arch = "aarch64")]
112mod aarch64;
113mod generic;
114#[cfg(target_arch = "x86_64")]
115mod x86;
116
117pub use generic::escape_generic;
118
119const BB: u8 = b'b'; // \x08
120const TT: u8 = b't'; // \x09
121const NN: u8 = b'n'; // \x0A
122const FF: u8 = b'f'; // \x0C
123const RR: u8 = b'r'; // \x0D
124pub(crate) const QU: u8 = b'"'; // \x22
125pub(crate) const BS: u8 = b'\\'; // \x5C
126pub(crate) const UU: u8 = b'u'; // \x00...\x1F except the ones above
127const __: u8 = 0;
128
129// Lookup table of escape sequences. A value of b'x' at index i means that byte
130// i is escaped as "\x" in JSON. A value of 0 means that byte i is not escaped.
131pub(crate) const ESCAPE: [u8; 256] = [
132    //   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
133    UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0
134    UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1
135    __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
136    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
137    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
138    __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
139    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
140    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
141    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
142    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
143    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
144    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
145    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
146    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
147    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
148    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
149];
150
151// Precomputed hex byte pairs for faster control character escaping
152pub(crate) const HEX_BYTES: [(u8, u8); 256] = {
153    let mut bytes = [(0u8, 0u8); 256];
154    let mut i = 0;
155    while i < 256 {
156        let high = (i >> 4) as u8;
157        let low = (i & 0xF) as u8;
158        bytes[i] = (
159            if high < 10 {
160                b'0' + high
161            } else {
162                b'a' + high - 10
163            },
164            if low < 10 {
165                b'0' + low
166            } else {
167                b'a' + low - 10
168            },
169        );
170        i += 1;
171    }
172    bytes
173};
174
175/// Main entry point for JSON string escaping with SIMD acceleration
176/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used.
177pub fn escape<S: AsRef<str>>(input: S) -> String {
178    #[cfg(target_arch = "x86_64")]
179    {
180        // Runtime CPU feature detection for x86_64
181        if is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512bw") {
182            unsafe { return x86::escape_avx512(input) }
183        } else if is_x86_feature_detected!("avx2") {
184            unsafe { return x86::escape_avx2(input) }
185        } else if is_x86_feature_detected!("sse2") {
186            unsafe { return x86::escape_sse2(input) }
187        } else {
188            return escape_generic(input);
189        }
190    }
191
192    #[cfg(target_arch = "aarch64")]
193    {
194        #[cfg(feature = "force_aarch64_neon")]
195        {
196            return aarch64::escape_neon(input);
197        }
198        #[cfg(not(feature = "force_aarch64_neon"))]
199        {
200            // on Apple M2 and later, the `bf16` feature is available
201            // it means they have more registers and can significantly benefit from the SIMD path
202            // TODO: add support for sve2 chips with wider registers
203            // github actions ubuntu-24.04-arm runner has 128 bits sve2 registers, it's not enough for the SIMD path
204            if cfg!(target_os = "macos") && std::arch::is_aarch64_feature_detected!("bf16") {
205                return aarch64::escape_neon(input);
206            } else {
207                return escape_generic(input);
208            }
209        }
210    }
211
212    #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
213    escape_generic(input)
214}
215
216#[test]
217fn test_escape_ascii_json_string() {
218    let fixture = r#"abcdefghijklmnopqrstuvwxyz .*? hello world escape json string"#;
219    assert_eq!(escape(fixture), serde_json::to_string(fixture).unwrap());
220}
221
222#[test]
223fn test_escape_json_string() {
224    let mut fixture = String::new();
225    for i in 0u8..=0x1F {
226        fixture.push(i as char);
227    }
228    fixture.push('\t');
229    fixture.push('\x08');
230    fixture.push('\x09');
231    fixture.push('\x0A');
232    fixture.push('\x0C');
233    fixture.push('\x0D');
234    fixture.push('\x22');
235    fixture.push('\x5C');
236    fixture.push_str("normal string");
237    fixture.push('😊');
238    fixture.push_str("中文 English 🚀 \n❓ 𝄞");
239    escape(fixture.as_str());
240    assert_eq!(
241        escape(fixture.as_str()),
242        serde_json::to_string(fixture.as_str()).unwrap(),
243        "fixture: {:?}",
244        fixture
245    );
246}
247
248// Test cases for various string sizes to cover different SIMD paths
249
250#[test]
251fn test_empty_string() {
252    assert_eq!(escape(""), r#""""#);
253}
254
255#[test]
256fn test_very_small_strings() {
257    // Less than 16 bytes (SSE register size)
258    assert_eq!(escape("a"), r#""a""#);
259    assert_eq!(escape("ab"), r#""ab""#);
260    assert_eq!(escape("hello"), r#""hello""#);
261    assert_eq!(escape("hello\n"), r#""hello\n""#);
262    assert_eq!(escape("\""), r#""\"""#);
263    assert_eq!(escape("\\"), r#""\\""#);
264    assert_eq!(escape("\t"), r#""\t""#);
265    assert_eq!(escape("\r\n"), r#""\r\n""#);
266}
267
268#[test]
269fn test_small_strings_16_bytes() {
270    // Exactly 16 bytes - SSE register boundary
271    let s16 = "0123456789abcdef";
272    assert_eq!(s16.len(), 16);
273    assert_eq!(escape(s16), serde_json::to_string(s16).unwrap());
274
275    // 16 bytes with escapes
276    let s16_esc = "01234567\t9abcde";
277    assert_eq!(s16_esc.len(), 15); // \t is 1 byte
278    assert_eq!(escape(s16_esc), serde_json::to_string(s16_esc).unwrap());
279}
280
281#[test]
282fn test_medium_strings_32_bytes() {
283    // Exactly 32 bytes - AVX2 register boundary
284    let s32 = "0123456789abcdef0123456789abcdef";
285    assert_eq!(s32.len(), 32);
286    assert_eq!(escape(s32), serde_json::to_string(s32).unwrap());
287
288    // 32 bytes with escapes at different positions
289    let s32_esc = "0123456789abcde\"0123456789abcde";
290    assert_eq!(escape(s32_esc), serde_json::to_string(s32_esc).unwrap());
291}
292
293#[test]
294fn test_large_strings_128_bytes() {
295    // Exactly 128 bytes - main loop size
296    let s128 = "0123456789abcdef".repeat(8);
297    assert_eq!(s128.len(), 128);
298    assert_eq!(escape(&s128), serde_json::to_string(&s128).unwrap());
299
300    // 128 bytes with escapes spread throughout
301    let mut s128_esc = String::new();
302    for i in 0..8 {
303        if i % 2 == 0 {
304            s128_esc.push_str("0123456789abcd\n");
305        } else {
306            s128_esc.push_str("0123456789abcd\"");
307        }
308    }
309    assert_eq!(escape(&s128_esc), serde_json::to_string(&s128_esc).unwrap());
310}
311
312#[test]
313fn test_unaligned_data() {
314    // Test strings that start at various alignments
315    for offset in 0..32 {
316        let padding = " ".repeat(offset);
317        let test_str = format!("{}{}", padding, "test\nstring\"with\\escapes");
318        let result = escape(&test_str[offset..]);
319        let expected = serde_json::to_string(&test_str[offset..]).unwrap();
320        assert_eq!(result, expected, "Failed at offset {}", offset);
321    }
322}
323
324#[test]
325fn test_sparse_escapes() {
326    // Large string with escapes only at the beginning and end
327    let mut s = String::new();
328    s.push('"');
329    s.push_str(&"a".repeat(500));
330    s.push('\\');
331    assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
332}
333
334#[test]
335fn test_dense_escapes() {
336    // String with many escapes
337    let s = "\"\\\"\\\"\\\"\\".repeat(50);
338    assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
339
340    // All control characters
341    let mut ctrl = String::new();
342    for _ in 0..10 {
343        for i in 0u8..32 {
344            ctrl.push(i as char);
345        }
346    }
347    assert_eq!(escape(&ctrl), serde_json::to_string(&ctrl).unwrap());
348}
349
350#[test]
351fn test_boundary_conditions() {
352    // Test around 256 byte boundary (common cache line multiple)
353    for size in 250..260 {
354        let s = "a".repeat(size);
355        assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
356
357        // With escape at the end
358        let mut s_esc = "a".repeat(size - 1);
359        s_esc.push('"');
360        assert_eq!(escape(&s_esc), serde_json::to_string(&s_esc).unwrap());
361    }
362}
363
364#[test]
365fn test_all_escape_types() {
366    // Test each escape type individually
367    assert_eq!(escape("\x00"), r#""\u0000""#);
368    assert_eq!(escape("\x08"), r#""\b""#);
369    assert_eq!(escape("\x09"), r#""\t""#);
370    assert_eq!(escape("\x0A"), r#""\n""#);
371    assert_eq!(escape("\x0C"), r#""\f""#);
372    assert_eq!(escape("\x0D"), r#""\r""#);
373    assert_eq!(escape("\x1F"), r#""\u001f""#);
374    assert_eq!(escape("\""), r#""\"""#);
375    assert_eq!(escape("\\"), r#""\\""#);
376
377    // Test all control characters
378    for i in 0u8..32 {
379        let s = String::from_utf8(vec![i]).unwrap();
380        let result = escape(&s);
381        let expected = serde_json::to_string(&s).unwrap();
382        assert_eq!(result, expected, "Failed for byte 0x{:02x}", i);
383    }
384}
385
386#[test]
387fn test_mixed_content() {
388    // Mix of ASCII, escapes, and multi-byte UTF-8
389    let mixed = r#"Hello "World"!
390    Tab:	Here
391    Emoji: 😀 Chinese: 中文
392    Math: ∑∫∂ Music: 𝄞
393    Escape: \" \\ \n \r \t"#;
394    assert_eq!(escape(mixed), serde_json::to_string(mixed).unwrap());
395}
396
397#[test]
398fn test_repeated_patterns() {
399    // Patterns that might benefit from or confuse SIMD operations
400    let pattern1 = "abcd".repeat(100);
401    assert_eq!(escape(&pattern1), serde_json::to_string(&pattern1).unwrap());
402
403    let pattern2 = "a\"b\"".repeat(100);
404    assert_eq!(escape(&pattern2), serde_json::to_string(&pattern2).unwrap());
405
406    let pattern3 = "\t\n".repeat(100);
407    assert_eq!(escape(&pattern3), serde_json::to_string(&pattern3).unwrap());
408}
409
410#[test]
411fn test_rxjs() {
412    let dir = glob::glob("node_modules/rxjs/src/**/*.ts").unwrap();
413    let mut sources = Vec::new();
414    for entry in dir {
415        sources.push(std::fs::read_to_string(entry.unwrap()).unwrap());
416    }
417    assert!(!sources.is_empty());
418    for source in sources {
419        assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
420    }
421}
422
423#[test]
424fn test_sources() {
425    let ts_paths = glob::glob("fixtures/**/*.ts").unwrap();
426    let tsx_paths = glob::glob("fixtures/**/*.tsx").unwrap();
427    let js_paths = glob::glob("fixtures/**/*.js").unwrap();
428    let mjs_paths = glob::glob("fixtures/**/*.mjs").unwrap();
429    let cjs_paths = glob::glob("fixtures/**/*.cjs").unwrap();
430    let mut sources = Vec::new();
431    for entry in ts_paths
432        .chain(tsx_paths)
433        .chain(js_paths)
434        .chain(mjs_paths)
435        .chain(cjs_paths)
436    {
437        let p = entry.unwrap();
438        if std::fs::metadata(&p).unwrap().is_file() {
439            sources.push(std::fs::read_to_string(&p).unwrap());
440        }
441    }
442    assert!(!sources.is_empty());
443    for source in sources {
444        assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
445    }
446}