json_escape_simd/
lib.rs

1//! Optimized SIMD routines for escaping JSON strings.
2//!
3//! ## <div class="warning">Important</div>
4//!
5//! On aarch64 NEON hosts the available register width is **128** bits, which is narrower than the lookup table this implementation prefers. As a result the SIMD path may not outperform the generic fallback, which is reflected in the benchmark numbers below.
6//!
7//! On some modern macOS devices with larger register numbers, the SIMD path may outperform the generic fallback, see the [M3 max benchmark](#apple-m3-max) below.
8//!
9//! ### Note
10//!
11//! The `force_aarch64_neon` feature flag can be used to force use of the neon implementation on aarch64. This is useful for the benchmark.
12//!
13//! ## Benchmarks
14//!
15//! Numbers below come from `cargo bench` runs on GitHub Actions hardware. Criterion reports are summarized to make it easier to spot relative performance. "vs fastest" shows how much slower each implementation is compared to the fastest entry in the table (1.00× means fastest).
16//!
17//! ### GitHub Actions x86_64 (`ubuntu-latest`)
18//!
19//! `AVX2` enabled.
20//!
21//! **RxJS payload (~10k iterations)**
22//!
23//! | Implementation        | Median time   | vs fastest |
24//! | --------------------- | ------------- | ---------- |
25//! | **`escape simd`**     | **345.06 µs** | **1.00×**  |
26//! | `escape v_jsonescape` | 576.25 µs     | 1.67×      |
27//! | `escape generic`      | 657.94 µs     | 1.91×      |
28//! | `serde_json`          | 766.72 µs     | 2.22×      |
29//! | `json-escape`         | 782.65 µs     | 2.27×      |
30//!
31//! **Fixtures payload (~300 iterations)**
32//!
33//! | Implementation        | Median time  | vs fastest |
34//! | --------------------- | ------------ | ---------- |
35//! | **`escape simd`**     | **12.84 ms** | **1.00×**  |
36//! | `escape v_jsonescape` | 19.66 ms     | 1.53×      |
37//! | `escape generic`      | 22.53 ms     | 1.75×      |
38//! | `serde_json`          | 24.65 ms     | 1.92×      |
39//! | `json-escape`         | 26.64 ms     | 2.07×      |
40//!
41//! ### GitHub Actions aarch64 (`ubuntu-24.04-arm`)
42//!
43//! Neon enabled.
44//!
45//! **RxJS payload (~10k iterations)**
46//!
47//! | Implementation        | Median time   | vs fastest |
48//! | --------------------- | ------------- | ---------- |
49//! | **`escape generic`**  | **546.89 µs** | **1.00×**  |
50//! | `escape simd`         | 589.29 µs     | 1.08×      |
51//! | `serde_json`          | 612.33 µs     | 1.12×      |
52//! | `json-escape`         | 624.66 µs     | 1.14×      |
53//! | `escape v_jsonescape` | 789.14 µs     | 1.44×      |
54//!
55//! **Fixtures payload (~300 iterations)**
56//!
57//! | Implementation        | Median time  | vs fastest |
58//! | --------------------- | ------------ | ---------- |
59//! | **`escape generic`**  | **17.81 ms** | **1.00×**  |
60//! | `serde_json`          | 19.77 ms     | 1.11×      |
61//! | `json-escape`         | 20.84 ms     | 1.17×      |
62//! | `escape simd`         | 21.04 ms     | 1.18×      |
63//! | `escape v_jsonescape` | 25.57 ms     | 1.44×      |
64//!
65//! ### GitHub Actions macOS (`macos-latest`)
66//!
67//! Apple M1 chip
68//!
69//! **RxJS payload (~10k iterations)**
70//!
71//! | Implementation        | Median time   | vs fastest |
72//! | --------------------- | ------------- | ---------- |
73//! | **`escape generic`**  | **759.07 µs** | **1.00×**  |
74//! | `escape simd`         | 764.98 µs     | 1.01×      |
75//! | `serde_json`          | 793.91 µs     | 1.05×      |
76//! | `json-escape`         | 868.21 µs     | 1.14×      |
77//! | `escape v_jsonescape` | 926.00 µs     | 1.22×      |
78//!
79//! **Fixtures payload (~300 iterations)**
80//!
81//! | Implementation        | Median time  | vs fastest |
82//! | --------------------- | ------------ | ---------- |
83//! | **`serde_json`**      | **26.41 ms** | **1.00×**  |
84//! | `escape generic`      | 26.43 ms     | 1.00×      |
85//! | `escape simd`         | 26.42 ms     | 1.00×      |
86//! | `json-escape`         | 28.94 ms     | 1.10×      |
87//! | `escape v_jsonescape` | 29.22 ms     | 1.11×      |
88//!
89//! ### Apple M3 Max
90//!
91//! **RxJS payload (~10k iterations)**
92//!
93//! | Implementation        | Median time   | vs fastest |
94//! | --------------------- | ------------- | ---------- |
95//! | **`escape simd`**     | **307.20 µs** | **1.00×**  |
96//! | `escape generic`      | 490.00 µs     | 1.60×      |
97//! | `serde_json`          | 570.35 µs     | 1.86×      |
98//! | `escape v_jsonescape` | 599.72 µs     | 1.95×      |
99//! | `json-escape`         | 644.73 µs     | 2.10×      |
100//!
101//! **Fixtures payload (~300 iterations)**
102//!
103//! | Implementation        | Median time  | vs fastest |
104//! | --------------------- | ------------ | ---------- |
105//! | **`escape generic`**  | **17.89 ms** | **1.00×**  |
106//! | **`escape simd`**     | **17.92 ms** | **1.00×**  |
107//! | `serde_json`          | 19.78 ms     | 1.11×      |
108//! | `escape v_jsonescape` | 21.09 ms     | 1.18×      |
109//! | `json-escape`         | 22.43 ms     | 1.25×      |
110
111#[cfg(target_arch = "aarch64")]
112mod aarch64;
113mod generic;
114#[cfg(target_arch = "x86_64")]
115mod x86;
116
117pub use generic::escape_generic;
118
119/// Main entry point for JSON string escaping with SIMD acceleration
120/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used.
121pub fn escape<S: AsRef<str>>(input: S) -> String {
122    #[cfg(target_arch = "x86_64")]
123    {
124        // Runtime CPU feature detection for x86_64
125        if is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512bw") {
126            unsafe { return x86::escape_avx512(input) }
127        } else if is_x86_feature_detected!("avx2") {
128            unsafe { return x86::escape_avx2(input) }
129        } else if is_x86_feature_detected!("sse2") {
130            unsafe { return x86::escape_sse2(input) }
131        } else {
132            return escape_generic(input);
133        }
134    }
135
136    #[cfg(target_arch = "aarch64")]
137    {
138        #[cfg(feature = "force_aarch64_neon")]
139        {
140            return aarch64::escape_neon(input);
141        }
142        #[cfg(not(feature = "force_aarch64_neon"))]
143        {
144            // on Apple M2 and later, the `bf16` feature is available
145            // it means they have more registers and can significantly benefit from the SIMD path
146            // TODO: add support for sve2 chips with wider registers
147            // github actions ubuntu-24.04-arm runner has 128 bits sve2 registers, it's not enough for the SIMD path
148            if cfg!(target_os = "macos") && std::arch::is_aarch64_feature_detected!("bf16") {
149                return aarch64::escape_neon(input);
150            } else {
151                return escape_generic(input);
152            }
153        }
154    }
155
156    #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
157    escape_generic(input)
158}
159
160#[test]
161fn test_escape_ascii_json_string() {
162    let fixture = r#"abcdefghijklmnopqrstuvwxyz .*? hello world escape json string"#;
163    assert_eq!(escape(fixture), serde_json::to_string(fixture).unwrap());
164}
165
166#[test]
167fn test_escape_json_string() {
168    let mut fixture = String::new();
169    for i in 0u8..=0x1F {
170        fixture.push(i as char);
171    }
172    fixture.push('\t');
173    fixture.push('\x08');
174    fixture.push('\x09');
175    fixture.push('\x0A');
176    fixture.push('\x0C');
177    fixture.push('\x0D');
178    fixture.push('\x22');
179    fixture.push('\x5C');
180    fixture.push_str("normal string");
181    fixture.push('😊');
182    fixture.push_str("中文 English 🚀 \n❓ 𝄞");
183    escape(fixture.as_str());
184    assert_eq!(
185        escape(fixture.as_str()),
186        serde_json::to_string(fixture.as_str()).unwrap(),
187        "fixture: {:?}",
188        fixture
189    );
190}
191
192// Test cases for various string sizes to cover different SIMD paths
193
194#[test]
195fn test_empty_string() {
196    assert_eq!(escape(""), r#""""#);
197}
198
199#[test]
200fn test_very_small_strings() {
201    // Less than 16 bytes (SSE register size)
202    assert_eq!(escape("a"), r#""a""#);
203    assert_eq!(escape("ab"), r#""ab""#);
204    assert_eq!(escape("hello"), r#""hello""#);
205    assert_eq!(escape("hello\n"), r#""hello\n""#);
206    assert_eq!(escape("\""), r#""\"""#);
207    assert_eq!(escape("\\"), r#""\\""#);
208    assert_eq!(escape("\t"), r#""\t""#);
209    assert_eq!(escape("\r\n"), r#""\r\n""#);
210}
211
212#[test]
213fn test_small_strings_16_bytes() {
214    // Exactly 16 bytes - SSE register boundary
215    let s16 = "0123456789abcdef";
216    assert_eq!(s16.len(), 16);
217    assert_eq!(escape(s16), serde_json::to_string(s16).unwrap());
218
219    // 16 bytes with escapes
220    let s16_esc = "01234567\t9abcde";
221    assert_eq!(s16_esc.len(), 15); // \t is 1 byte
222    assert_eq!(escape(s16_esc), serde_json::to_string(s16_esc).unwrap());
223}
224
225#[test]
226fn test_medium_strings_32_bytes() {
227    // Exactly 32 bytes - AVX2 register boundary
228    let s32 = "0123456789abcdef0123456789abcdef";
229    assert_eq!(s32.len(), 32);
230    assert_eq!(escape(s32), serde_json::to_string(s32).unwrap());
231
232    // 32 bytes with escapes at different positions
233    let s32_esc = "0123456789abcde\"0123456789abcde";
234    assert_eq!(escape(s32_esc), serde_json::to_string(s32_esc).unwrap());
235}
236
237#[test]
238fn test_large_strings_128_bytes() {
239    // Exactly 128 bytes - main loop size
240    let s128 = "0123456789abcdef".repeat(8);
241    assert_eq!(s128.len(), 128);
242    assert_eq!(escape(&s128), serde_json::to_string(&s128).unwrap());
243
244    // 128 bytes with escapes spread throughout
245    let mut s128_esc = String::new();
246    for i in 0..8 {
247        if i % 2 == 0 {
248            s128_esc.push_str("0123456789abcd\n");
249        } else {
250            s128_esc.push_str("0123456789abcd\"");
251        }
252    }
253    assert_eq!(escape(&s128_esc), serde_json::to_string(&s128_esc).unwrap());
254}
255
256#[test]
257fn test_unaligned_data() {
258    // Test strings that start at various alignments
259    for offset in 0..32 {
260        let padding = " ".repeat(offset);
261        let test_str = format!("{}{}", padding, "test\nstring\"with\\escapes");
262        let result = escape(&test_str[offset..]);
263        let expected = serde_json::to_string(&test_str[offset..]).unwrap();
264        assert_eq!(result, expected, "Failed at offset {}", offset);
265    }
266}
267
268#[test]
269fn test_sparse_escapes() {
270    // Large string with escapes only at the beginning and end
271    let mut s = String::new();
272    s.push('"');
273    s.push_str(&"a".repeat(500));
274    s.push('\\');
275    assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
276}
277
278#[test]
279fn test_dense_escapes() {
280    // String with many escapes
281    let s = "\"\\\"\\\"\\\"\\".repeat(50);
282    assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
283
284    // All control characters
285    let mut ctrl = String::new();
286    for _ in 0..10 {
287        for i in 0u8..32 {
288            ctrl.push(i as char);
289        }
290    }
291    assert_eq!(escape(&ctrl), serde_json::to_string(&ctrl).unwrap());
292}
293
294#[test]
295fn test_boundary_conditions() {
296    // Test around 256 byte boundary (common cache line multiple)
297    for size in 250..260 {
298        let s = "a".repeat(size);
299        assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
300
301        // With escape at the end
302        let mut s_esc = "a".repeat(size - 1);
303        s_esc.push('"');
304        assert_eq!(escape(&s_esc), serde_json::to_string(&s_esc).unwrap());
305    }
306}
307
308#[test]
309fn test_all_escape_types() {
310    // Test each escape type individually
311    assert_eq!(escape("\x00"), r#""\u0000""#);
312    assert_eq!(escape("\x08"), r#""\b""#);
313    assert_eq!(escape("\x09"), r#""\t""#);
314    assert_eq!(escape("\x0A"), r#""\n""#);
315    assert_eq!(escape("\x0C"), r#""\f""#);
316    assert_eq!(escape("\x0D"), r#""\r""#);
317    assert_eq!(escape("\x1F"), r#""\u001f""#);
318    assert_eq!(escape("\""), r#""\"""#);
319    assert_eq!(escape("\\"), r#""\\""#);
320
321    // Test all control characters
322    for i in 0u8..32 {
323        let s = String::from_utf8(vec![i]).unwrap();
324        let result = escape(&s);
325        let expected = serde_json::to_string(&s).unwrap();
326        assert_eq!(result, expected, "Failed for byte 0x{:02x}", i);
327    }
328}
329
330#[test]
331fn test_mixed_content() {
332    // Mix of ASCII, escapes, and multi-byte UTF-8
333    let mixed = r#"Hello "World"!
334    Tab:	Here
335    Emoji: 😀 Chinese: 中文
336    Math: ∑∫∂ Music: 𝄞
337    Escape: \" \\ \n \r \t"#;
338    assert_eq!(escape(mixed), serde_json::to_string(mixed).unwrap());
339}
340
341#[test]
342fn test_repeated_patterns() {
343    // Patterns that might benefit from or confuse SIMD operations
344    let pattern1 = "abcd".repeat(100);
345    assert_eq!(escape(&pattern1), serde_json::to_string(&pattern1).unwrap());
346
347    let pattern2 = "a\"b\"".repeat(100);
348    assert_eq!(escape(&pattern2), serde_json::to_string(&pattern2).unwrap());
349
350    let pattern3 = "\t\n".repeat(100);
351    assert_eq!(escape(&pattern3), serde_json::to_string(&pattern3).unwrap());
352}
353
354#[test]
355fn test_rxjs() {
356    let dir = glob::glob("node_modules/rxjs/src/**/*.ts").unwrap();
357    let mut sources = Vec::new();
358    for entry in dir {
359        sources.push(std::fs::read_to_string(entry.unwrap()).unwrap());
360    }
361    assert!(!sources.is_empty());
362    for source in sources {
363        assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
364    }
365}
366
367#[test]
368fn test_sources() {
369    let ts_paths = glob::glob("fixtures/**/*.ts").unwrap();
370    let tsx_paths = glob::glob("fixtures/**/*.tsx").unwrap();
371    let js_paths = glob::glob("fixtures/**/*.js").unwrap();
372    let mjs_paths = glob::glob("fixtures/**/*.mjs").unwrap();
373    let cjs_paths = glob::glob("fixtures/**/*.cjs").unwrap();
374    let mut sources = Vec::new();
375    for entry in ts_paths
376        .chain(tsx_paths)
377        .chain(js_paths)
378        .chain(mjs_paths)
379        .chain(cjs_paths)
380    {
381        let p = entry.unwrap();
382        if std::fs::metadata(&p).unwrap().is_file() {
383            sources.push(std::fs::read_to_string(&p).unwrap());
384        }
385    }
386    assert!(!sources.is_empty());
387    for source in sources {
388        assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
389    }
390}