json_escape_simd/
lib.rs

1//! Optimized SIMD routines for escaping JSON strings.
2//!
3//! ## <div class="warning">Important</div>
4//!
5//! On aarch64 NEON hosts the available register width is **128** bits, which is narrower than the lookup table this implementation prefers. As a result the SIMD path may not outperform the generic fallback, which is reflected in the benchmark numbers below.
6//!
7//! On some modern macOS devices with larger register numbers, the SIMD path may outperform the generic fallback, see the [M3 max benchmark](#apple-m3-max) below.
8//!
9//! ### Note
10//!
11//! The `force_aarch64_neon` feature flag can be used to force use of the neon implementation on aarch64. This is useful for the benchmark.
12//!
13//! ## Benchmarks
14//!
15//! Numbers below come from `cargo bench` runs on GitHub Actions hardware. Criterion reports are summarized to make it easier to spot relative performance. "vs fastest" shows how much slower each implementation is compared to the fastest entry in the table (1.00× means fastest).
16//!
17//! ### GitHub Actions x86_64 (`ubuntu-latest`)
18//!
19//! `AVX2` enabled.
20//!
21//! **RxJS payload (~10k iterations)**
22//!
23//! | Implementation        | Median time   | vs fastest |
24//! | --------------------- | ------------- | ---------- |
25//! | **`escape simd`**     | **341.18 µs** | **1.00×**  |
26//! | `escape v_jsonescape` | 555.47 µs     | 1.63×      |
27//! | `escape generic`      | 656.85 µs     | 1.93×      |
28//! | `serde_json`          | 744.75 µs     | 2.18×      |
29//! | `json-escape`         | 777.15 µs     | 2.28×      |
30//!
31//! **Fixtures payload (~300 iterations)**
32//!
33//! | Implementation        | Median time  | vs fastest |
34//! | --------------------- | ------------ | ---------- |
35//! | **`escape simd`**     | **12.67 ms** | **1.00×**  |
36//! | `escape v_jsonescape` | 20.58 ms     | 1.62×      |
37//! | `escape generic`      | 22.57 ms     | 1.78×      |
38//! | `serde_json`          | 24.52 ms     | 1.94×      |
39//! | `json-escape`         | 26.97 ms     | 2.13×      |
40//!
41//! ### GitHub Actions aarch64 (`ubuntu-24.04-arm`)
42//!
43//! Neon enabled.
44//!
45//! **RxJS payload (~10k iterations)**
46//!
47//! | Implementation        | Median time   | vs fastest |
48//! | --------------------- | ------------- | ---------- |
49//! | **`escape generic`**  | **546.89 µs** | **1.00×**  |
50//! | `escape simd`         | 589.29 µs     | 1.08×      |
51//! | `serde_json`          | 612.33 µs     | 1.12×      |
52//! | `json-escape`         | 624.66 µs     | 1.14×      |
53//! | `escape v_jsonescape` | 789.14 µs     | 1.44×      |
54//!
55//! **Fixtures payload (~300 iterations)**
56//!
57//! | Implementation        | Median time  | vs fastest |
58//! | --------------------- | ------------ | ---------- |
59//! | **`escape generic`**  | **17.81 ms** | **1.00×**  |
60//! | `serde_json`          | 19.77 ms     | 1.11×      |
61//! | `json-escape`         | 20.84 ms     | 1.17×      |
62//! | `escape simd`         | 21.04 ms     | 1.18×      |
63//! | `escape v_jsonescape` | 25.57 ms     | 1.44×      |
64//!
65//! ### GitHub Actions macOS (`macos-latest`)
66//!
67//! Apple M1 chip
68//!
69//! **RxJS payload (~10k iterations)**
70//!
71//! | Implementation        | Median time   | vs fastest |
72//! | --------------------- | ------------- | ---------- |
73//! | **`escape generic`**  | **759.07 µs** | **1.00×**  |
74//! | `escape simd`         | 764.98 µs     | 1.01×      |
75//! | `serde_json`          | 793.91 µs     | 1.05×      |
76//! | `json-escape`         | 868.21 µs     | 1.14×      |
77//! | `escape v_jsonescape` | 926.00 µs     | 1.22×      |
78//!
79//! **Fixtures payload (~300 iterations)**
80//!
81//! | Implementation        | Median time  | vs fastest |
82//! | --------------------- | ------------ | ---------- |
83//! | **`serde_json`**      | **26.41 ms** | **1.00×**  |
84//! | `escape generic`      | 26.43 ms     | 1.00×      |
85//! | `escape simd`         | 26.42 ms     | 1.00×      |
86//! | `json-escape`         | 28.94 ms     | 1.10×      |
87//! | `escape v_jsonescape` | 29.22 ms     | 1.11×      |
88//!
89//! ### Apple M3 Max
90//!
91//! **RxJS payload (~10k iterations)**
92//!
93//! | Implementation        | Median time   | vs fastest |
94//! | --------------------- | ------------- | ---------- |
95//! | **`escape simd`**     | **307.20 µs** | **1.00×**  |
96//! | `escape generic`      | 490.00 µs     | 1.60×      |
97//! | `serde_json`          | 570.35 µs     | 1.86×      |
98//! | `escape v_jsonescape` | 599.72 µs     | 1.95×      |
99//! | `json-escape`         | 644.73 µs     | 2.10×      |
100//!
101//! **Fixtures payload (~300 iterations)**
102//!
103//! | Implementation        | Median time  | vs fastest |
104//! | --------------------- | ------------ | ---------- |
105//! | **`escape generic`**  | **17.89 ms** | **1.00×**  |
106//! | **`escape simd`**     | **17.92 ms** | **1.00×**  |
107//! | `serde_json`          | 19.78 ms     | 1.11×      |
108//! | `escape v_jsonescape` | 21.09 ms     | 1.18×      |
109//! | `json-escape`         | 22.43 ms     | 1.25×      |
110
111#[cfg(target_arch = "aarch64")]
112mod aarch64;
113mod generic;
114#[cfg(target_arch = "x86_64")]
115mod x86;
116
117pub use generic::escape_generic;
118
119/// Main entry point for JSON string escaping with SIMD acceleration
120/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used.
121pub fn escape<S: AsRef<str>>(input: S) -> String {
122    #[cfg(target_arch = "x86_64")]
123    {
124        use generic::escape_inner;
125
126        let mut result = Vec::with_capacity(input.as_ref().len() + input.as_ref().len() / 2 + 2);
127        result.push(b'"');
128        let s = input.as_ref();
129        let bytes = s.as_bytes();
130        let len = bytes.len();
131        // Runtime CPU feature detection for x86_64
132        if is_x86_feature_detected!("avx512f")
133            && is_x86_feature_detected!("avx512bw")
134            && len >= x86::LOOP_SIZE_AVX512
135        {
136            unsafe { x86::escape_avx512(bytes, &mut result) }
137        } else if is_x86_feature_detected!("avx2") && len >= x86::LOOP_SIZE_AVX2 {
138            unsafe { x86::escape_avx2(bytes, &mut result) }
139        } else if is_x86_feature_detected!("sse2")
140            && /* if len < 128, no need to use simd */
141            len >= x86::LOOP_SIZE_AVX2
142        {
143            unsafe { x86::escape_sse2(bytes, &mut result) }
144        } else {
145            escape_inner(bytes, &mut result);
146        }
147        result.push(b'"');
148        // SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
149        unsafe { String::from_utf8_unchecked(result) }
150    }
151
152    #[cfg(target_arch = "aarch64")]
153    {
154        #[cfg(feature = "force_aarch64_neon")]
155        {
156            return aarch64::escape_neon(input);
157        }
158        #[cfg(not(feature = "force_aarch64_neon"))]
159        {
160            // on Apple M2 and later, the `bf16` feature is available
161            // it means they have more registers and can significantly benefit from the SIMD path
162            // TODO: add support for sve2 chips with wider registers
163            // github actions ubuntu-24.04-arm runner has 128 bits sve2 registers, it's not enough for the SIMD path
164            if cfg!(target_os = "macos") && std::arch::is_aarch64_feature_detected!("bf16") {
165                return aarch64::escape_neon(input);
166            } else {
167                return escape_generic(input);
168            }
169        }
170    }
171
172    #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
173    escape_generic(input)
174}
175
176#[test]
177fn test_escape_ascii_json_string() {
178    let fixture = r#"abcdefghijklmnopqrstuvwxyz .*? hello world escape json string"#;
179    assert_eq!(escape(fixture), serde_json::to_string(fixture).unwrap());
180}
181
182#[test]
183fn test_escape_json_string() {
184    let mut fixture = String::new();
185    for i in 0u8..=0x1F {
186        fixture.push(i as char);
187    }
188    fixture.push('\t');
189    fixture.push('\x08');
190    fixture.push('\x09');
191    fixture.push('\x0A');
192    fixture.push('\x0C');
193    fixture.push('\x0D');
194    fixture.push('\x22');
195    fixture.push('\x5C');
196    fixture.push_str("normal string");
197    fixture.push('😊');
198    fixture.push_str("中文 English 🚀 \n❓ 𝄞");
199    escape(fixture.as_str());
200    assert_eq!(
201        escape(fixture.as_str()),
202        serde_json::to_string(fixture.as_str()).unwrap(),
203        "fixture: {:?}",
204        fixture
205    );
206}
207
208// Test cases for various string sizes to cover different SIMD paths
209
210#[test]
211fn test_empty_string() {
212    assert_eq!(escape(""), r#""""#);
213}
214
215#[test]
216fn test_very_small_strings() {
217    // Less than 16 bytes (SSE register size)
218    assert_eq!(escape("a"), r#""a""#);
219    assert_eq!(escape("ab"), r#""ab""#);
220    assert_eq!(escape("hello"), r#""hello""#);
221    assert_eq!(escape("hello\n"), r#""hello\n""#);
222    assert_eq!(escape("\""), r#""\"""#);
223    assert_eq!(escape("\\"), r#""\\""#);
224    assert_eq!(escape("\t"), r#""\t""#);
225    assert_eq!(escape("\r\n"), r#""\r\n""#);
226}
227
228#[test]
229fn test_small_strings_16_bytes() {
230    // Exactly 16 bytes - SSE register boundary
231    let s16 = "0123456789abcdef";
232    assert_eq!(s16.len(), 16);
233    assert_eq!(escape(s16), serde_json::to_string(s16).unwrap());
234
235    // 16 bytes with escapes
236    let s16_esc = "01234567\t9abcde";
237    assert_eq!(s16_esc.len(), 15); // \t is 1 byte
238    assert_eq!(escape(s16_esc), serde_json::to_string(s16_esc).unwrap());
239}
240
241#[test]
242fn test_medium_strings_32_bytes() {
243    // Exactly 32 bytes - AVX2 register boundary
244    let s32 = "0123456789abcdef0123456789abcdef";
245    assert_eq!(s32.len(), 32);
246    assert_eq!(escape(s32), serde_json::to_string(s32).unwrap());
247
248    // 32 bytes with escapes at different positions
249    let s32_esc = "0123456789abcde\"0123456789abcde";
250    assert_eq!(escape(s32_esc), serde_json::to_string(s32_esc).unwrap());
251}
252
253#[test]
254fn test_large_strings_128_bytes() {
255    // Exactly 128 bytes - main loop size
256    let s128 = "0123456789abcdef".repeat(8);
257    assert_eq!(s128.len(), 128);
258    assert_eq!(escape(&s128), serde_json::to_string(&s128).unwrap());
259
260    // 128 bytes with escapes spread throughout
261    let mut s128_esc = String::new();
262    for i in 0..8 {
263        if i % 2 == 0 {
264            s128_esc.push_str("0123456789abcd\n");
265        } else {
266            s128_esc.push_str("0123456789abcd\"");
267        }
268    }
269    assert_eq!(escape(&s128_esc), serde_json::to_string(&s128_esc).unwrap());
270}
271
272#[test]
273fn test_unaligned_data() {
274    // Test strings that start at various alignments
275    for offset in 0..32 {
276        let padding = " ".repeat(offset);
277        let test_str = format!("{}{}", padding, "test\nstring\"with\\escapes");
278        let result = escape(&test_str[offset..]);
279        let expected = serde_json::to_string(&test_str[offset..]).unwrap();
280        assert_eq!(result, expected, "Failed at offset {}", offset);
281    }
282}
283
284#[test]
285fn test_sparse_escapes() {
286    // Large string with escapes only at the beginning and end
287    let mut s = String::new();
288    s.push('"');
289    s.push_str(&"a".repeat(500));
290    s.push('\\');
291    assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
292}
293
294#[test]
295fn test_dense_escapes() {
296    // String with many escapes
297    let s = "\"\\\"\\\"\\\"\\".repeat(50);
298    assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
299
300    // All control characters
301    let mut ctrl = String::new();
302    for _ in 0..10 {
303        for i in 0u8..32 {
304            ctrl.push(i as char);
305        }
306    }
307    assert_eq!(escape(&ctrl), serde_json::to_string(&ctrl).unwrap());
308}
309
310#[test]
311fn test_boundary_conditions() {
312    // Test around 256 byte boundary (common cache line multiple)
313    for size in 250..260 {
314        let s = "a".repeat(size);
315        assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
316
317        // With escape at the end
318        let mut s_esc = "a".repeat(size - 1);
319        s_esc.push('"');
320        assert_eq!(escape(&s_esc), serde_json::to_string(&s_esc).unwrap());
321    }
322}
323
324#[test]
325fn test_all_escape_types() {
326    // Test each escape type individually
327    assert_eq!(escape("\x00"), r#""\u0000""#);
328    assert_eq!(escape("\x08"), r#""\b""#);
329    assert_eq!(escape("\x09"), r#""\t""#);
330    assert_eq!(escape("\x0A"), r#""\n""#);
331    assert_eq!(escape("\x0C"), r#""\f""#);
332    assert_eq!(escape("\x0D"), r#""\r""#);
333    assert_eq!(escape("\x1F"), r#""\u001f""#);
334    assert_eq!(escape("\""), r#""\"""#);
335    assert_eq!(escape("\\"), r#""\\""#);
336
337    // Test all control characters
338    for i in 0u8..32 {
339        let s = String::from_utf8(vec![i]).unwrap();
340        let result = escape(&s);
341        let expected = serde_json::to_string(&s).unwrap();
342        assert_eq!(result, expected, "Failed for byte 0x{:02x}", i);
343    }
344}
345
346#[test]
347fn test_mixed_content() {
348    // Mix of ASCII, escapes, and multi-byte UTF-8
349    let mixed = r#"Hello "World"!
350    Tab:	Here
351    Emoji: 😀 Chinese: 中文
352    Math: ∑∫∂ Music: 𝄞
353    Escape: \" \\ \n \r \t"#;
354    assert_eq!(escape(mixed), serde_json::to_string(mixed).unwrap());
355}
356
357#[test]
358fn test_repeated_patterns() {
359    // Patterns that might benefit from or confuse SIMD operations
360    let pattern1 = "abcd".repeat(100);
361    assert_eq!(escape(&pattern1), serde_json::to_string(&pattern1).unwrap());
362
363    let pattern2 = "a\"b\"".repeat(100);
364    assert_eq!(escape(&pattern2), serde_json::to_string(&pattern2).unwrap());
365
366    let pattern3 = "\t\n".repeat(100);
367    assert_eq!(escape(&pattern3), serde_json::to_string(&pattern3).unwrap());
368}
369
370#[test]
371fn test_rxjs() {
372    let dir = glob::glob("node_modules/rxjs/src/**/*.ts").unwrap();
373    let mut sources = Vec::new();
374    for entry in dir {
375        sources.push(std::fs::read_to_string(entry.unwrap()).unwrap());
376    }
377    assert!(!sources.is_empty());
378    for source in sources {
379        assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
380    }
381}
382
383#[test]
384fn test_sources() {
385    let ts_paths = glob::glob("fixtures/**/*.ts").unwrap();
386    let tsx_paths = glob::glob("fixtures/**/*.tsx").unwrap();
387    let js_paths = glob::glob("fixtures/**/*.js").unwrap();
388    let mjs_paths = glob::glob("fixtures/**/*.mjs").unwrap();
389    let cjs_paths = glob::glob("fixtures/**/*.cjs").unwrap();
390    let mut sources = Vec::new();
391    for entry in ts_paths
392        .chain(tsx_paths)
393        .chain(js_paths)
394        .chain(mjs_paths)
395        .chain(cjs_paths)
396    {
397        let p = entry.unwrap();
398        if std::fs::metadata(&p).unwrap().is_file() {
399            sources.push(std::fs::read_to_string(&p).unwrap());
400        }
401    }
402    assert!(!sources.is_empty());
403    for source in sources {
404        assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
405    }
406}