json_escape_simd/
lib.rs

1//! Optimized SIMD routines for escaping JSON strings.
2//!
3//! ## <div class="warning">Important</div>
4//!
5//! On aarch64 NEON hosts the available register width is **128** bits, which is narrower than the lookup table this implementation prefers. As a result the SIMD path may not outperform the generic fallback, which is reflected in the benchmark numbers below.
6//!
7//! On some modern macOS devices with larger register numbers, the SIMD path may outperform the generic fallback, see the [M3 max benchmark](#apple-m3-max) below.
8//!
9//! ### Note
10//!
11//! The `force_aarch64_neon` feature flag can be used to force use of the neon implementation on aarch64. This is useful for the benchmark.
12//!
13//! ## Benchmarks
14//!
15//! Numbers below come from `cargo bench` runs on GitHub Actions hardware. Criterion reports are summarized to make it easier to spot relative performance. "vs fastest" shows how much slower each implementation is compared to the fastest entry in the table (1.00× means fastest).
16//!
17//! ### GitHub Actions x86_64 (`ubuntu-latest`)
18//!
19//! `AVX2` enabled.
20//!
21//! **RxJS payload (~10k iterations)**
22//!
23//! | Implementation        | Median time   | vs fastest |
24//! | --------------------- | ------------- | ---------- |
25//! | **`escape simd`**     | **341.18 µs** | **1.00×**  |
26//! | `escape v_jsonescape` | 555.47 µs     | 1.63×      |
27//! | `escape generic`      | 656.85 µs     | 1.93×      |
28//! | `serde_json`          | 744.75 µs     | 2.18×      |
29//! | `json-escape`         | 777.15 µs     | 2.28×      |
30//!
31//! **Fixtures payload (~300 iterations)**
32//!
33//! | Implementation        | Median time  | vs fastest |
34//! | --------------------- | ------------ | ---------- |
35//! | **`escape simd`**     | **12.67 ms** | **1.00×**  |
36//! | `escape v_jsonescape` | 20.58 ms     | 1.62×      |
37//! | `escape generic`      | 22.57 ms     | 1.78×      |
38//! | `serde_json`          | 24.52 ms     | 1.94×      |
39//! | `json-escape`         | 26.97 ms     | 2.13×      |
40//!
41//! ### GitHub Actions aarch64 (`ubuntu-24.04-arm`)
42//!
43//! Neon enabled.
44//!
45//! **RxJS payload (~10k iterations)**
46//!
47//! | Implementation        | Median time   | vs fastest |
48//! | --------------------- | ------------- | ---------- |
49//! | **`escape generic`**  | **546.89 µs** | **1.00×**  |
50//! | `escape simd`         | 589.29 µs     | 1.08×      |
51//! | `serde_json`          | 612.33 µs     | 1.12×      |
52//! | `json-escape`         | 624.66 µs     | 1.14×      |
53//! | `escape v_jsonescape` | 789.14 µs     | 1.44×      |
54//!
55//! **Fixtures payload (~300 iterations)**
56//!
57//! | Implementation        | Median time  | vs fastest |
58//! | --------------------- | ------------ | ---------- |
59//! | **`escape generic`**  | **17.81 ms** | **1.00×**  |
60//! | `serde_json`          | 19.77 ms     | 1.11×      |
61//! | `json-escape`         | 20.84 ms     | 1.17×      |
62//! | `escape simd`         | 21.04 ms     | 1.18×      |
63//! | `escape v_jsonescape` | 25.57 ms     | 1.44×      |
64//!
65//! ### GitHub Actions macOS (`macos-latest`)
66//!
67//! Apple M1 chip
68//!
69//! **RxJS payload (~10k iterations)**
70//!
71//! | Implementation        | Median time   | vs fastest |
72//! | --------------------- | ------------- | ---------- |
73//! | **`escape generic`**  | **759.07 µs** | **1.00×**  |
74//! | `escape simd`         | 764.98 µs     | 1.01×      |
75//! | `serde_json`          | 793.91 µs     | 1.05×      |
76//! | `json-escape`         | 868.21 µs     | 1.14×      |
77//! | `escape v_jsonescape` | 926.00 µs     | 1.22×      |
78//!
79//! **Fixtures payload (~300 iterations)**
80//!
81//! | Implementation        | Median time  | vs fastest |
82//! | --------------------- | ------------ | ---------- |
83//! | **`serde_json`**      | **26.41 ms** | **1.00×**  |
84//! | `escape generic`      | 26.43 ms     | 1.00×      |
85//! | `escape simd`         | 26.42 ms     | 1.00×      |
86//! | `json-escape`         | 28.94 ms     | 1.10×      |
87//! | `escape v_jsonescape` | 29.22 ms     | 1.11×      |
88//!
89//! ### Apple M3 Max
90//!
91//! **RxJS payload (~10k iterations)**
92//!
93//! | Implementation        | Median time   | vs fastest |
94//! | --------------------- | ------------- | ---------- |
95//! | **`escape simd`**     | **307.20 µs** | **1.00×**  |
96//! | `escape generic`      | 490.00 µs     | 1.60×      |
97//! | `serde_json`          | 570.35 µs     | 1.86×      |
98//! | `escape v_jsonescape` | 599.72 µs     | 1.95×      |
99//! | `json-escape`         | 644.73 µs     | 2.10×      |
100//!
101//! **Fixtures payload (~300 iterations)**
102//!
103//! | Implementation        | Median time  | vs fastest |
104//! | --------------------- | ------------ | ---------- |
105//! | **`escape generic`**  | **17.89 ms** | **1.00×**  |
106//! | **`escape simd`**     | **17.92 ms** | **1.00×**  |
107//! | `serde_json`          | 19.78 ms     | 1.11×      |
108//! | `escape v_jsonescape` | 21.09 ms     | 1.18×      |
109//! | `json-escape`         | 22.43 ms     | 1.25×      |
110
111#[cfg(target_arch = "aarch64")]
112mod aarch64;
113mod generic;
114#[cfg(target_arch = "x86_64")]
115mod x86;
116
117pub use generic::{escape_generic, escape_into_generic};
118
119/// Main entry point for JSON string escaping with SIMD acceleration
120/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used.
121pub fn escape<S: AsRef<str>>(input: S) -> String {
122    use generic::escape_inner;
123
124    let mut result = Vec::with_capacity(input.as_ref().len() + input.as_ref().len() / 2 + 2);
125    result.push(b'"');
126    let s = input.as_ref();
127    let bytes = s.as_bytes();
128    #[cfg(target_arch = "x86_64")]
129    {
130        let len = bytes.len();
131        // Runtime CPU feature detection for x86_64
132        if is_x86_feature_detected!("avx512f")
133            && is_x86_feature_detected!("avx512bw")
134            && len >= x86::LOOP_SIZE_AVX512
135        {
136            unsafe { x86::escape_avx512(bytes, &mut result) }
137        } else if is_x86_feature_detected!("avx2") && len >= x86::LOOP_SIZE_AVX2 {
138            unsafe { x86::escape_avx2(bytes, &mut result) }
139        } else if is_x86_feature_detected!("sse2")
140            && /* if len < 128, no need to use simd */
141            len >= x86::LOOP_SIZE_AVX2
142        {
143            unsafe { x86::escape_sse2(bytes, &mut result) }
144        } else {
145            escape_inner(bytes, &mut result);
146        }
147    }
148
149    #[cfg(target_arch = "aarch64")]
150    {
151        #[cfg(feature = "force_aarch64_neon")]
152        {
153            aarch64::escape_neon(bytes, &mut result);
154        }
155        #[cfg(not(feature = "force_aarch64_neon"))]
156        {
157            // on Apple M2 and later, the `bf16` feature is available
158            // it means they have more registers and can significantly benefit from the SIMD path
159            // TODO: add support for sve2 chips with wider registers
160            // github actions ubuntu-24.04-arm runner has 128 bits sve2 registers, it's not enough for the SIMD path
161            if cfg!(target_os = "macos") && std::arch::is_aarch64_feature_detected!("bf16") {
162                aarch64::escape_neon(bytes, &mut result);
163            } else {
164                escape_inner(bytes, &mut result);
165            }
166        }
167    }
168
169    #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
170    {
171        escape_inner(bytes, &mut result);
172    }
173    result.push(b'"');
174    // SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
175    unsafe { String::from_utf8_unchecked(result) }
176}
177
178/// Main entry point for JSON string escaping with SIMD acceleration
179/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used.
180pub fn escape_into<S: AsRef<str>>(input: S, output: &mut Vec<u8>) {
181    use generic::escape_inner;
182
183    output.push(b'"');
184    let s = input.as_ref();
185    let bytes = s.as_bytes();
186    #[cfg(target_arch = "x86_64")]
187    {
188        let len = bytes.len();
189        // Runtime CPU feature detection for x86_64
190        if is_x86_feature_detected!("avx512f")
191            && is_x86_feature_detected!("avx512bw")
192            && len >= x86::LOOP_SIZE_AVX512
193        {
194            unsafe { x86::escape_avx512(bytes, output) }
195        } else if is_x86_feature_detected!("avx2") && len >= x86::LOOP_SIZE_AVX2 {
196            unsafe { x86::escape_avx2(bytes, output) }
197        } else if is_x86_feature_detected!("sse2")
198          && /* if len < 128, no need to use simd */
199          len >= x86::LOOP_SIZE_AVX2
200        {
201            unsafe { x86::escape_sse2(bytes, output) }
202        } else {
203            escape_inner(bytes, output);
204        }
205    }
206
207    #[cfg(target_arch = "aarch64")]
208    {
209        #[cfg(feature = "force_aarch64_neon")]
210        {
211            return aarch64::escape_neon(bytes, output);
212        }
213        #[cfg(not(feature = "force_aarch64_neon"))]
214        {
215            // on Apple M2 and later, the `bf16` feature is available
216            // it means they have more registers and can significantly benefit from the SIMD path
217            // TODO: add support for sve2 chips with wider registers
218            // github actions ubuntu-24.04-arm runner has 128 bits sve2 registers, it's not enough for the SIMD path
219            if cfg!(target_os = "macos") && std::arch::is_aarch64_feature_detected!("bf16") {
220                aarch64::escape_neon(bytes, output);
221            } else {
222                escape_inner(bytes, output);
223            }
224        }
225    }
226
227    #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
228    {
229        escape_into_generic(input, output);
230    }
231    output.push(b'"');
232}
233
234#[test]
235fn test_escape_ascii_json_string() {
236    let fixture = r#"abcdefghijklmnopqrstuvwxyz .*? hello world escape json string"#;
237    assert_eq!(escape(fixture), serde_json::to_string(fixture).unwrap());
238}
239
240#[test]
241fn test_escape_json_string() {
242    let mut fixture = String::new();
243    for i in 0u8..=0x1F {
244        fixture.push(i as char);
245    }
246    fixture.push('\t');
247    fixture.push('\x08');
248    fixture.push('\x09');
249    fixture.push('\x0A');
250    fixture.push('\x0C');
251    fixture.push('\x0D');
252    fixture.push('\x22');
253    fixture.push('\x5C');
254    fixture.push_str("normal string");
255    fixture.push('😊');
256    fixture.push_str("中文 English 🚀 \n❓ 𝄞");
257    escape(fixture.as_str());
258    assert_eq!(
259        escape(fixture.as_str()),
260        serde_json::to_string(fixture.as_str()).unwrap(),
261        "fixture: {:?}",
262        fixture
263    );
264}
265
266// Test cases for various string sizes to cover different SIMD paths
267
268#[test]
269fn test_empty_string() {
270    assert_eq!(escape(""), r#""""#);
271}
272
273#[test]
274fn test_very_small_strings() {
275    // Less than 16 bytes (SSE register size)
276    assert_eq!(escape("a"), r#""a""#);
277    assert_eq!(escape("ab"), r#""ab""#);
278    assert_eq!(escape("hello"), r#""hello""#);
279    assert_eq!(escape("hello\n"), r#""hello\n""#);
280    assert_eq!(escape("\""), r#""\"""#);
281    assert_eq!(escape("\\"), r#""\\""#);
282    assert_eq!(escape("\t"), r#""\t""#);
283    assert_eq!(escape("\r\n"), r#""\r\n""#);
284}
285
286#[test]
287fn test_small_strings_16_bytes() {
288    // Exactly 16 bytes - SSE register boundary
289    let s16 = "0123456789abcdef";
290    assert_eq!(s16.len(), 16);
291    assert_eq!(escape(s16), serde_json::to_string(s16).unwrap());
292
293    // 16 bytes with escapes
294    let s16_esc = "01234567\t9abcde";
295    assert_eq!(s16_esc.len(), 15); // \t is 1 byte
296    assert_eq!(escape(s16_esc), serde_json::to_string(s16_esc).unwrap());
297}
298
299#[test]
300fn test_medium_strings_32_bytes() {
301    // Exactly 32 bytes - AVX2 register boundary
302    let s32 = "0123456789abcdef0123456789abcdef";
303    assert_eq!(s32.len(), 32);
304    assert_eq!(escape(s32), serde_json::to_string(s32).unwrap());
305
306    // 32 bytes with escapes at different positions
307    let s32_esc = "0123456789abcde\"0123456789abcde";
308    assert_eq!(escape(s32_esc), serde_json::to_string(s32_esc).unwrap());
309}
310
311#[test]
312fn test_large_strings_128_bytes() {
313    // Exactly 128 bytes - main loop size
314    let s128 = "0123456789abcdef".repeat(8);
315    assert_eq!(s128.len(), 128);
316    assert_eq!(escape(&s128), serde_json::to_string(&s128).unwrap());
317
318    // 128 bytes with escapes spread throughout
319    let mut s128_esc = String::new();
320    for i in 0..8 {
321        if i % 2 == 0 {
322            s128_esc.push_str("0123456789abcd\n");
323        } else {
324            s128_esc.push_str("0123456789abcd\"");
325        }
326    }
327    assert_eq!(escape(&s128_esc), serde_json::to_string(&s128_esc).unwrap());
328}
329
330#[test]
331fn test_unaligned_data() {
332    // Test strings that start at various alignments
333    for offset in 0..32 {
334        let padding = " ".repeat(offset);
335        let test_str = format!("{}{}", padding, "test\nstring\"with\\escapes");
336        let result = escape(&test_str[offset..]);
337        let expected = serde_json::to_string(&test_str[offset..]).unwrap();
338        assert_eq!(result, expected, "Failed at offset {}", offset);
339    }
340}
341
342#[test]
343fn test_sparse_escapes() {
344    // Large string with escapes only at the beginning and end
345    let mut s = String::new();
346    s.push('"');
347    s.push_str(&"a".repeat(500));
348    s.push('\\');
349    assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
350}
351
352#[test]
353fn test_dense_escapes() {
354    // String with many escapes
355    let s = "\"\\\"\\\"\\\"\\".repeat(50);
356    assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
357
358    // All control characters
359    let mut ctrl = String::new();
360    for _ in 0..10 {
361        for i in 0u8..32 {
362            ctrl.push(i as char);
363        }
364    }
365    assert_eq!(escape(&ctrl), serde_json::to_string(&ctrl).unwrap());
366}
367
368#[test]
369fn test_boundary_conditions() {
370    // Test around 256 byte boundary (common cache line multiple)
371    for size in 250..260 {
372        let s = "a".repeat(size);
373        assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
374
375        // With escape at the end
376        let mut s_esc = "a".repeat(size - 1);
377        s_esc.push('"');
378        assert_eq!(escape(&s_esc), serde_json::to_string(&s_esc).unwrap());
379    }
380}
381
382#[test]
383fn test_all_escape_types() {
384    // Test each escape type individually
385    assert_eq!(escape("\x00"), r#""\u0000""#);
386    assert_eq!(escape("\x08"), r#""\b""#);
387    assert_eq!(escape("\x09"), r#""\t""#);
388    assert_eq!(escape("\x0A"), r#""\n""#);
389    assert_eq!(escape("\x0C"), r#""\f""#);
390    assert_eq!(escape("\x0D"), r#""\r""#);
391    assert_eq!(escape("\x1F"), r#""\u001f""#);
392    assert_eq!(escape("\""), r#""\"""#);
393    assert_eq!(escape("\\"), r#""\\""#);
394
395    // Test all control characters
396    for i in 0u8..32 {
397        let s = String::from_utf8(vec![i]).unwrap();
398        let result = escape(&s);
399        let expected = serde_json::to_string(&s).unwrap();
400        assert_eq!(result, expected, "Failed for byte 0x{:02x}", i);
401    }
402}
403
404#[test]
405fn test_mixed_content() {
406    // Mix of ASCII, escapes, and multi-byte UTF-8
407    let mixed = r#"Hello "World"!
408    Tab:	Here
409    Emoji: 😀 Chinese: 中文
410    Math: ∑∫∂ Music: 𝄞
411    Escape: \" \\ \n \r \t"#;
412    assert_eq!(escape(mixed), serde_json::to_string(mixed).unwrap());
413}
414
415#[test]
416fn test_repeated_patterns() {
417    // Patterns that might benefit from or confuse SIMD operations
418    let pattern1 = "abcd".repeat(100);
419    assert_eq!(escape(&pattern1), serde_json::to_string(&pattern1).unwrap());
420
421    let pattern2 = "a\"b\"".repeat(100);
422    assert_eq!(escape(&pattern2), serde_json::to_string(&pattern2).unwrap());
423
424    let pattern3 = "\t\n".repeat(100);
425    assert_eq!(escape(&pattern3), serde_json::to_string(&pattern3).unwrap());
426}
427
428#[test]
429fn test_rxjs() {
430    let dir = glob::glob("node_modules/rxjs/src/**/*.ts").unwrap();
431    let mut sources = Vec::new();
432    for entry in dir {
433        sources.push(std::fs::read_to_string(entry.unwrap()).unwrap());
434    }
435    assert!(!sources.is_empty());
436    for source in sources {
437        assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
438        let mut output = String::new();
439        escape_into(&source, unsafe { output.as_mut_vec() });
440        assert_eq!(output, serde_json::to_string(&source).unwrap());
441    }
442}
443
444#[test]
445fn test_sources() {
446    let ts_paths = glob::glob("fixtures/**/*.ts").unwrap();
447    let tsx_paths = glob::glob("fixtures/**/*.tsx").unwrap();
448    let js_paths = glob::glob("fixtures/**/*.js").unwrap();
449    let mjs_paths = glob::glob("fixtures/**/*.mjs").unwrap();
450    let cjs_paths = glob::glob("fixtures/**/*.cjs").unwrap();
451    let mut sources = Vec::new();
452    for entry in ts_paths
453        .chain(tsx_paths)
454        .chain(js_paths)
455        .chain(mjs_paths)
456        .chain(cjs_paths)
457    {
458        let p = entry.unwrap();
459        if std::fs::metadata(&p).unwrap().is_file() {
460            sources.push(std::fs::read_to_string(&p).unwrap());
461        }
462    }
463    assert!(!sources.is_empty());
464    for source in sources {
465        assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
466        let mut output = String::new();
467        escape_into(&source, unsafe { output.as_mut_vec() });
468        assert_eq!(output, serde_json::to_string(&source).unwrap());
469    }
470}