json_escape_simd/
lib.rs

1//! Optimized SIMD routines for escaping JSON strings.
2//!
3//! ## <div class="warning">Important</div>
4//!
5//! On aarch64 NEON hosts the available register width is **128** bits, which is narrower than the lookup table this implementation prefers. As a result the SIMD path may not outperform the generic fallback, which is reflected in the benchmark numbers below.
6//!
7//! On some modern macOS devices with larger register numbers, the SIMD path may outperform the generic fallback, see the [M3 max benchmark](#apple-m3-max) below.
8//!
9//! ### Note
10//!
11//! The `force_aarch64_neon` feature flag can be used to force use of the neon implementation on aarch64. This is useful for the benchmark.
12//!
13//! ## Benchmarks
14//!
15//! Numbers below come from `cargo bench` runs on GitHub Actions hardware. Criterion reports are summarized to make it easier to spot relative performance. "vs fastest" shows how much slower each implementation is compared to the fastest entry in the table (1.00× means fastest).
16//!
17//! ### GitHub Actions x86_64 (`ubuntu-latest`)
18//!
19//! `AVX2` enabled.
20//!
21//! **RxJS payload (~10k iterations)**
22//!
23//! | Implementation        | Median time   | vs fastest |
24//! | --------------------- | ------------- | ---------- |
25//! | **`escape simd`**     | **345.06 µs** | **1.00×**  |
26//! | `escape v_jsonescape` | 576.25 µs     | 1.67×      |
27//! | `escape generic`      | 657.94 µs     | 1.91×      |
28//! | `serde_json`          | 766.72 µs     | 2.22×      |
29//! | `json-escape`         | 782.65 µs     | 2.27×      |
30//!
31//! **Fixtures payload (~300 iterations)**
32//!
33//! | Implementation        | Median time  | vs fastest |
34//! | --------------------- | ------------ | ---------- |
35//! | **`escape simd`**     | **12.84 ms** | **1.00×**  |
36//! | `escape v_jsonescape` | 19.66 ms     | 1.53×      |
37//! | `escape generic`      | 22.53 ms     | 1.75×      |
38//! | `serde_json`          | 24.65 ms     | 1.92×      |
39//! | `json-escape`         | 26.64 ms     | 2.07×      |
40//!
41//! ### GitHub Actions aarch64 (`ubuntu-24.04-arm`)
42//!
43//! Neon enabled.
44//!
45//! **RxJS payload (~10k iterations)**
46//!
47//! | Implementation        | Median time   | vs fastest |
48//! | --------------------- | ------------- | ---------- |
49//! | **`escape generic`**  | **546.89 µs** | **1.00×**  |
50//! | `escape simd`         | 589.29 µs     | 1.08×      |
51//! | `serde_json`          | 612.33 µs     | 1.12×      |
52//! | `json-escape`         | 624.66 µs     | 1.14×      |
53//! | `escape v_jsonescape` | 789.14 µs     | 1.44×      |
54//!
55//! **Fixtures payload (~300 iterations)**
56//!
57//! | Implementation        | Median time  | vs fastest |
58//! | --------------------- | ------------ | ---------- |
59//! | **`escape generic`**  | **17.81 ms** | **1.00×**  |
60//! | `serde_json`          | 19.77 ms     | 1.11×      |
61//! | `json-escape`         | 20.84 ms     | 1.17×      |
62//! | `escape simd`         | 21.04 ms     | 1.18×      |
63//! | `escape v_jsonescape` | 25.57 ms     | 1.44×      |
64//!
65//! ### GitHub Actions macOS (`macos-latest`)
66//!
67//! Apple M1 chip
68//!
69//! **RxJS payload (~10k iterations)**
70//!
71//! | Implementation        | Median time   | vs fastest |
72//! | --------------------- | ------------- | ---------- |
73//! | **`escape generic`**  | **759.07 µs** | **1.00×**  |
74//! | `escape simd`         | 764.98 µs     | 1.01×      |
75//! | `serde_json`          | 793.91 µs     | 1.05×      |
76//! | `json-escape`         | 868.21 µs     | 1.14×      |
77//! | `escape v_jsonescape` | 926.00 µs     | 1.22×      |
78//!
79//! **Fixtures payload (~300 iterations)**
80//!
81//! | Implementation        | Median time  | vs fastest |
82//! | --------------------- | ------------ | ---------- |
83//! | **`serde_json`**      | **26.41 ms** | **1.00×**  |
84//! | `escape generic`      | 26.43 ms     | 1.00×      |
85//! | `escape simd`         | 26.42 ms     | 1.00×      |
86//! | `json-escape`         | 28.94 ms     | 1.10×      |
87//! | `escape v_jsonescape` | 29.22 ms     | 1.11×      |
88//!
89//! ### Apple M3 Max
90//!
91//! **RxJS payload (~10k iterations)**
92//!
93//! | Implementation        | Median time   | vs fastest |
94//! | --------------------- | ------------- | ---------- |
95//! | **`escape simd`**     | **307.20 µs** | **1.00×**  |
96//! | `escape generic`      | 490.00 µs     | 1.60×      |
97//! | `serde_json`          | 570.35 µs     | 1.86×      |
98//! | `escape v_jsonescape` | 599.72 µs     | 1.95×      |
99//! | `json-escape`         | 644.73 µs     | 2.10×      |
100//!
101//! **Fixtures payload (~300 iterations)**
102//!
103//! | Implementation        | Median time  | vs fastest |
104//! | --------------------- | ------------ | ---------- |
105//! | **`escape generic`**  | **17.89 ms** | **1.00×**  |
106//! | **`escape simd`**     | **17.92 ms** | **1.00×**  |
107//! | `serde_json`          | 19.78 ms     | 1.11×      |
108//! | `escape v_jsonescape` | 21.09 ms     | 1.18×      |
109//! | `json-escape`         | 22.43 ms     | 1.25×      |
110
111#[cfg(target_arch = "x86_64")]
112mod x86;
113
114#[cfg(target_arch = "aarch64")]
115mod aarch64;
116
117const BB: u8 = b'b'; // \x08
118const TT: u8 = b't'; // \x09
119const NN: u8 = b'n'; // \x0A
120const FF: u8 = b'f'; // \x0C
121const RR: u8 = b'r'; // \x0D
122pub(crate) const QU: u8 = b'"'; // \x22
123pub(crate) const BS: u8 = b'\\'; // \x5C
124pub(crate) const UU: u8 = b'u'; // \x00...\x1F except the ones above
125const __: u8 = 0;
126
127// Lookup table of escape sequences. A value of b'x' at index i means that byte
128// i is escaped as "\x" in JSON. A value of 0 means that byte i is not escaped.
129pub(crate) const ESCAPE: [u8; 256] = [
130    //   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
131    UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0
132    UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1
133    __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
134    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
135    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
136    __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
137    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
138    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
139    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
140    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
141    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
142    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
143    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
144    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
145    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
146    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
147];
148
149// Precomputed hex byte pairs for faster control character escaping
150pub(crate) const HEX_BYTES: [(u8, u8); 256] = {
151    let mut bytes = [(0u8, 0u8); 256];
152    let mut i = 0;
153    while i < 256 {
154        let high = (i >> 4) as u8;
155        let low = (i & 0xF) as u8;
156        bytes[i] = (
157            if high < 10 {
158                b'0' + high
159            } else {
160                b'a' + high - 10
161            },
162            if low < 10 {
163                b'0' + low
164            } else {
165                b'a' + low - 10
166            },
167        );
168        i += 1;
169    }
170    bytes
171};
172
173#[inline]
174/// Cross platform generic implementation without any platform specific instructions
175pub fn escape_generic<S: AsRef<str>>(input: S) -> String {
176    let s = input.as_ref();
177    let bytes = s.as_bytes();
178
179    // Estimate capacity - most strings don't need much escaping
180    // Add some padding for potential escapes
181    let estimated_capacity = bytes.len() + bytes.len() / 2 + 2;
182    let mut result = Vec::with_capacity(estimated_capacity);
183
184    result.push(b'"');
185
186    let mut start = 0;
187    let mut i = 0;
188
189    while i < bytes.len() {
190        let b = bytes[i];
191
192        // Use lookup table to check if escaping is needed
193        let escape_byte = ESCAPE[b as usize];
194
195        if escape_byte == 0 {
196            // No escape needed, continue scanning
197            i += 1;
198            continue;
199        }
200
201        // Copy any unescaped bytes before this position
202        if start < i {
203            result.extend_from_slice(&bytes[start..i]);
204        }
205
206        // Handle the escape
207        result.push(b'\\');
208        if escape_byte == UU {
209            // Unicode escape for control characters
210            result.extend_from_slice(b"u00");
211            let hex_digits = &HEX_BYTES[b as usize];
212            result.push(hex_digits.0);
213            result.push(hex_digits.1);
214        } else {
215            // Simple escape
216            result.push(escape_byte);
217        }
218
219        i += 1;
220        start = i;
221    }
222
223    // Copy any remaining unescaped bytes
224    if start < bytes.len() {
225        result.extend_from_slice(&bytes[start..]);
226    }
227
228    result.push(b'"');
229
230    // SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
231    unsafe { String::from_utf8_unchecked(result) }
232}
233
234/// Main entry point for JSON string escaping with SIMD acceleration
235/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used.
236pub fn escape<S: AsRef<str>>(input: S) -> String {
237    #[cfg(target_arch = "x86_64")]
238    {
239        // Runtime CPU feature detection for x86_64
240        if is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512bw") {
241            unsafe { return x86::escape_avx512(input) }
242        } else if is_x86_feature_detected!("avx2") {
243            unsafe { return x86::escape_avx2(input) }
244        } else if is_x86_feature_detected!("sse2") {
245            unsafe { return x86::escape_sse2(input) }
246        } else {
247            return escape_generic(input);
248        }
249    }
250
251    #[cfg(target_arch = "aarch64")]
252    {
253        #[cfg(feature = "force_aarch64_neon")]
254        {
255            return aarch64::escape_neon(input);
256        }
257        #[cfg(not(feature = "force_aarch64_neon"))]
258        {
259            // on Apple M2 and later, the `bf16` feature is available
260            // it means they have more registers and can significantly benefit from the SIMD path
261            // TODO: add support for sve2 chips with wider registers
262            // github actions ubuntu-24.04-arm runner has 128 bits sve2 registers, it's not enough for the SIMD path
263            if cfg!(target_os = "macos") && std::arch::is_aarch64_feature_detected!("bf16") {
264                return aarch64::escape_neon(input);
265            } else {
266                return escape_generic(input);
267            }
268        }
269    }
270
271    #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
272    escape_generic(input)
273}
274
275#[test]
276fn test_escape_ascii_json_string() {
277    let fixture = r#"abcdefghijklmnopqrstuvwxyz .*? hello world escape json string"#;
278    assert_eq!(escape(fixture), serde_json::to_string(fixture).unwrap());
279}
280
281#[test]
282fn test_escape_json_string() {
283    let mut fixture = String::new();
284    for i in 0u8..=0x1F {
285        fixture.push(i as char);
286    }
287    fixture.push('\t');
288    fixture.push('\x08');
289    fixture.push('\x09');
290    fixture.push('\x0A');
291    fixture.push('\x0C');
292    fixture.push('\x0D');
293    fixture.push('\x22');
294    fixture.push('\x5C');
295    fixture.push_str("normal string");
296    fixture.push('😊');
297    fixture.push_str("中文 English 🚀 \n❓ 𝄞");
298    escape(fixture.as_str());
299    assert_eq!(
300        escape(fixture.as_str()),
301        serde_json::to_string(fixture.as_str()).unwrap(),
302        "fixture: {:?}",
303        fixture
304    );
305}
306
307// Test cases for various string sizes to cover different SIMD paths
308
309#[test]
310fn test_empty_string() {
311    assert_eq!(escape(""), r#""""#);
312}
313
314#[test]
315fn test_very_small_strings() {
316    // Less than 16 bytes (SSE register size)
317    assert_eq!(escape("a"), r#""a""#);
318    assert_eq!(escape("ab"), r#""ab""#);
319    assert_eq!(escape("hello"), r#""hello""#);
320    assert_eq!(escape("hello\n"), r#""hello\n""#);
321    assert_eq!(escape("\""), r#""\"""#);
322    assert_eq!(escape("\\"), r#""\\""#);
323    assert_eq!(escape("\t"), r#""\t""#);
324    assert_eq!(escape("\r\n"), r#""\r\n""#);
325}
326
327#[test]
328fn test_small_strings_16_bytes() {
329    // Exactly 16 bytes - SSE register boundary
330    let s16 = "0123456789abcdef";
331    assert_eq!(s16.len(), 16);
332    assert_eq!(escape(s16), serde_json::to_string(s16).unwrap());
333
334    // 16 bytes with escapes
335    let s16_esc = "01234567\t9abcde";
336    assert_eq!(s16_esc.len(), 15); // \t is 1 byte
337    assert_eq!(escape(s16_esc), serde_json::to_string(s16_esc).unwrap());
338}
339
340#[test]
341fn test_medium_strings_32_bytes() {
342    // Exactly 32 bytes - AVX2 register boundary
343    let s32 = "0123456789abcdef0123456789abcdef";
344    assert_eq!(s32.len(), 32);
345    assert_eq!(escape(s32), serde_json::to_string(s32).unwrap());
346
347    // 32 bytes with escapes at different positions
348    let s32_esc = "0123456789abcde\"0123456789abcde";
349    assert_eq!(escape(s32_esc), serde_json::to_string(s32_esc).unwrap());
350}
351
352#[test]
353fn test_large_strings_128_bytes() {
354    // Exactly 128 bytes - main loop size
355    let s128 = "0123456789abcdef".repeat(8);
356    assert_eq!(s128.len(), 128);
357    assert_eq!(escape(&s128), serde_json::to_string(&s128).unwrap());
358
359    // 128 bytes with escapes spread throughout
360    let mut s128_esc = String::new();
361    for i in 0..8 {
362        if i % 2 == 0 {
363            s128_esc.push_str("0123456789abcd\n");
364        } else {
365            s128_esc.push_str("0123456789abcd\"");
366        }
367    }
368    assert_eq!(escape(&s128_esc), serde_json::to_string(&s128_esc).unwrap());
369}
370
371#[test]
372fn test_unaligned_data() {
373    // Test strings that start at various alignments
374    for offset in 0..32 {
375        let padding = " ".repeat(offset);
376        let test_str = format!("{}{}", padding, "test\nstring\"with\\escapes");
377        let result = escape(&test_str[offset..]);
378        let expected = serde_json::to_string(&test_str[offset..]).unwrap();
379        assert_eq!(result, expected, "Failed at offset {}", offset);
380    }
381}
382
383#[test]
384fn test_sparse_escapes() {
385    // Large string with escapes only at the beginning and end
386    let mut s = String::new();
387    s.push('"');
388    s.push_str(&"a".repeat(500));
389    s.push('\\');
390    assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
391}
392
393#[test]
394fn test_dense_escapes() {
395    // String with many escapes
396    let s = "\"\\\"\\\"\\\"\\".repeat(50);
397    assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
398
399    // All control characters
400    let mut ctrl = String::new();
401    for _ in 0..10 {
402        for i in 0u8..32 {
403            ctrl.push(i as char);
404        }
405    }
406    assert_eq!(escape(&ctrl), serde_json::to_string(&ctrl).unwrap());
407}
408
409#[test]
410fn test_boundary_conditions() {
411    // Test around 256 byte boundary (common cache line multiple)
412    for size in 250..260 {
413        let s = "a".repeat(size);
414        assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
415
416        // With escape at the end
417        let mut s_esc = "a".repeat(size - 1);
418        s_esc.push('"');
419        assert_eq!(escape(&s_esc), serde_json::to_string(&s_esc).unwrap());
420    }
421}
422
423#[test]
424fn test_all_escape_types() {
425    // Test each escape type individually
426    assert_eq!(escape("\x00"), r#""\u0000""#);
427    assert_eq!(escape("\x08"), r#""\b""#);
428    assert_eq!(escape("\x09"), r#""\t""#);
429    assert_eq!(escape("\x0A"), r#""\n""#);
430    assert_eq!(escape("\x0C"), r#""\f""#);
431    assert_eq!(escape("\x0D"), r#""\r""#);
432    assert_eq!(escape("\x1F"), r#""\u001f""#);
433    assert_eq!(escape("\""), r#""\"""#);
434    assert_eq!(escape("\\"), r#""\\""#);
435
436    // Test all control characters
437    for i in 0u8..32 {
438        let s = String::from_utf8(vec![i]).unwrap();
439        let result = escape(&s);
440        let expected = serde_json::to_string(&s).unwrap();
441        assert_eq!(result, expected, "Failed for byte 0x{:02x}", i);
442    }
443}
444
445#[test]
446fn test_mixed_content() {
447    // Mix of ASCII, escapes, and multi-byte UTF-8
448    let mixed = r#"Hello "World"!
449    Tab:	Here
450    Emoji: 😀 Chinese: 中文
451    Math: ∑∫∂ Music: 𝄞
452    Escape: \" \\ \n \r \t"#;
453    assert_eq!(escape(mixed), serde_json::to_string(mixed).unwrap());
454}
455
456#[test]
457fn test_repeated_patterns() {
458    // Patterns that might benefit from or confuse SIMD operations
459    let pattern1 = "abcd".repeat(100);
460    assert_eq!(escape(&pattern1), serde_json::to_string(&pattern1).unwrap());
461
462    let pattern2 = "a\"b\"".repeat(100);
463    assert_eq!(escape(&pattern2), serde_json::to_string(&pattern2).unwrap());
464
465    let pattern3 = "\t\n".repeat(100);
466    assert_eq!(escape(&pattern3), serde_json::to_string(&pattern3).unwrap());
467}
468
469#[test]
470fn test_rxjs() {
471    let dir = glob::glob("node_modules/rxjs/src/**/*.ts").unwrap();
472    let mut sources = Vec::new();
473    for entry in dir {
474        sources.push(std::fs::read_to_string(entry.unwrap()).unwrap());
475    }
476    assert!(!sources.is_empty());
477    for source in sources {
478        assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
479    }
480}
481
482#[test]
483fn test_sources() {
484    let ts_paths = glob::glob("fixtures/**/*.ts").unwrap();
485    let tsx_paths = glob::glob("fixtures/**/*.tsx").unwrap();
486    let js_paths = glob::glob("fixtures/**/*.js").unwrap();
487    let mjs_paths = glob::glob("fixtures/**/*.mjs").unwrap();
488    let cjs_paths = glob::glob("fixtures/**/*.cjs").unwrap();
489    let mut sources = Vec::new();
490    for entry in ts_paths
491        .chain(tsx_paths)
492        .chain(js_paths)
493        .chain(mjs_paths)
494        .chain(cjs_paths)
495    {
496        let p = entry.unwrap();
497        if std::fs::metadata(&p).unwrap().is_file() {
498            sources.push(std::fs::read_to_string(&p).unwrap());
499        }
500    }
501    assert!(!sources.is_empty());
502    for source in sources {
503        assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
504    }
505}