json_escape_simd/lib.rs
1//! Optimized SIMD routines for escaping JSON strings.
2//!
3//! ## <div class="warning">Important</div>
4//!
5//! On aarch64 NEON hosts the available register width is **128** bits, which is narrower than the lookup table this implementation prefers. As a result the SIMD path may not outperform the generic fallback, which is reflected in the benchmark numbers below.
6//!
7//! On some modern macOS devices with larger register numbers, the SIMD path may outperform the generic fallback, see the [M3 max benchmark](#apple-m3-max) below.
8//!
9//! ### Note
10//!
11//! The `force_aarch64_neon` feature flag can be used to force use of the neon implementation on aarch64. This is useful for the benchmark.
12//!
13//! ## Benchmarks
14//!
15//! Numbers below come from `cargo bench` runs on GitHub Actions hardware. Criterion reports are summarized to make it easier to spot relative performance. "vs fastest" shows how much slower each implementation is compared to the fastest entry in the table (1.00× means fastest).
16//!
17//! ### GitHub Actions x86_64 (`ubuntu-latest`)
18//!
19//! `AVX2` enabled.
20//!
21//! **RxJS payload (~10k iterations)**
22//!
23//! | Implementation | Median time | vs fastest |
24//! | --------------------- | ------------- | ---------- |
25//! | **`escape simd`** | **341.18 µs** | **1.00×** |
26//! | `escape v_jsonescape` | 555.47 µs | 1.63× |
27//! | `escape generic` | 656.85 µs | 1.93× |
28//! | `serde_json` | 744.75 µs | 2.18× |
29//! | `json-escape` | 777.15 µs | 2.28× |
30//!
31//! **Fixtures payload (~300 iterations)**
32//!
33//! | Implementation | Median time | vs fastest |
34//! | --------------------- | ------------ | ---------- |
35//! | **`escape simd`** | **12.67 ms** | **1.00×** |
36//! | `escape v_jsonescape` | 20.58 ms | 1.62× |
37//! | `escape generic` | 22.57 ms | 1.78× |
38//! | `serde_json` | 24.52 ms | 1.94× |
39//! | `json-escape` | 26.97 ms | 2.13× |
40//!
41//! ### GitHub Actions aarch64 (`ubuntu-24.04-arm`)
42//!
43//! Neon enabled.
44//!
45//! **RxJS payload (~10k iterations)**
46//!
47//! | Implementation | Median time | vs fastest |
48//! | --------------------- | ------------- | ---------- |
49//! | **`escape generic`** | **546.89 µs** | **1.00×** |
50//! | `escape simd` | 589.29 µs | 1.08× |
51//! | `serde_json` | 612.33 µs | 1.12× |
52//! | `json-escape` | 624.66 µs | 1.14× |
53//! | `escape v_jsonescape` | 789.14 µs | 1.44× |
54//!
55//! **Fixtures payload (~300 iterations)**
56//!
57//! | Implementation | Median time | vs fastest |
58//! | --------------------- | ------------ | ---------- |
59//! | **`escape generic`** | **17.81 ms** | **1.00×** |
60//! | `serde_json` | 19.77 ms | 1.11× |
61//! | `json-escape` | 20.84 ms | 1.17× |
62//! | `escape simd` | 21.04 ms | 1.18× |
63//! | `escape v_jsonescape` | 25.57 ms | 1.44× |
64//!
65//! ### GitHub Actions macOS (`macos-latest`)
66//!
67//! Apple M1 chip
68//!
69//! **RxJS payload (~10k iterations)**
70//!
71//! | Implementation | Median time | vs fastest |
72//! | --------------------- | ------------- | ---------- |
73//! | **`escape generic`** | **759.07 µs** | **1.00×** |
74//! | `escape simd` | 764.98 µs | 1.01× |
75//! | `serde_json` | 793.91 µs | 1.05× |
76//! | `json-escape` | 868.21 µs | 1.14× |
77//! | `escape v_jsonescape` | 926.00 µs | 1.22× |
78//!
79//! **Fixtures payload (~300 iterations)**
80//!
81//! | Implementation | Median time | vs fastest |
82//! | --------------------- | ------------ | ---------- |
83//! | **`serde_json`** | **26.41 ms** | **1.00×** |
84//! | `escape generic` | 26.43 ms | 1.00× |
85//! | `escape simd` | 26.42 ms | 1.00× |
86//! | `json-escape` | 28.94 ms | 1.10× |
87//! | `escape v_jsonescape` | 29.22 ms | 1.11× |
88//!
89//! ### Apple M3 Max
90//!
91//! **RxJS payload (~10k iterations)**
92//!
93//! | Implementation | Median time | vs fastest |
94//! | --------------------- | ------------- | ---------- |
95//! | **`escape simd`** | **307.20 µs** | **1.00×** |
96//! | `escape generic` | 490.00 µs | 1.60× |
97//! | `serde_json` | 570.35 µs | 1.86× |
98//! | `escape v_jsonescape` | 599.72 µs | 1.95× |
99//! | `json-escape` | 644.73 µs | 2.10× |
100//!
101//! **Fixtures payload (~300 iterations)**
102//!
103//! | Implementation | Median time | vs fastest |
104//! | --------------------- | ------------ | ---------- |
105//! | **`escape generic`** | **17.89 ms** | **1.00×** |
106//! | **`escape simd`** | **17.92 ms** | **1.00×** |
107//! | `serde_json` | 19.78 ms | 1.11× |
108//! | `escape v_jsonescape` | 21.09 ms | 1.18× |
109//! | `json-escape` | 22.43 ms | 1.25× |
110
111#[cfg(target_arch = "aarch64")]
112mod aarch64;
113mod generic;
114#[cfg(target_arch = "x86_64")]
115mod x86;
116
117pub use generic::escape_generic;
118
119/// Main entry point for JSON string escaping with SIMD acceleration
120/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used.
121pub fn escape<S: AsRef<str>>(input: S) -> String {
122 #[cfg(target_arch = "x86_64")]
123 {
124 use generic::escape_inner;
125
126 let mut result = Vec::with_capacity(input.as_ref().len() + input.as_ref().len() / 2 + 2);
127 result.push(b'"');
128 let s = input.as_ref();
129 let bytes = s.as_bytes();
130 // Runtime CPU feature detection for x86_64
131 if is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512bw") {
132 unsafe { x86::escape_avx512(bytes, &mut result) }
133 } else if is_x86_feature_detected!("avx2") {
134 unsafe { x86::escape_avx2(bytes, &mut result) }
135 } else if is_x86_feature_detected!("sse2") {
136 unsafe { x86::escape_sse2(bytes, &mut result) }
137 } else {
138 escape_inner(bytes, &mut result);
139 }
140 result.push(b'"');
141 // SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
142 unsafe { String::from_utf8_unchecked(result) }
143 }
144
145 #[cfg(target_arch = "aarch64")]
146 {
147 #[cfg(feature = "force_aarch64_neon")]
148 {
149 return aarch64::escape_neon(input);
150 }
151 #[cfg(not(feature = "force_aarch64_neon"))]
152 {
153 // on Apple M2 and later, the `bf16` feature is available
154 // it means they have more registers and can significantly benefit from the SIMD path
155 // TODO: add support for sve2 chips with wider registers
156 // github actions ubuntu-24.04-arm runner has 128 bits sve2 registers, it's not enough for the SIMD path
157 if cfg!(target_os = "macos") && std::arch::is_aarch64_feature_detected!("bf16") {
158 return aarch64::escape_neon(input);
159 } else {
160 return escape_generic(input);
161 }
162 }
163 }
164
165 #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
166 escape_generic(input)
167}
168
169#[test]
170fn test_escape_ascii_json_string() {
171 let fixture = r#"abcdefghijklmnopqrstuvwxyz .*? hello world escape json string"#;
172 assert_eq!(escape(fixture), serde_json::to_string(fixture).unwrap());
173}
174
175#[test]
176fn test_escape_json_string() {
177 let mut fixture = String::new();
178 for i in 0u8..=0x1F {
179 fixture.push(i as char);
180 }
181 fixture.push('\t');
182 fixture.push('\x08');
183 fixture.push('\x09');
184 fixture.push('\x0A');
185 fixture.push('\x0C');
186 fixture.push('\x0D');
187 fixture.push('\x22');
188 fixture.push('\x5C');
189 fixture.push_str("normal string");
190 fixture.push('😊');
191 fixture.push_str("中文 English 🚀 \n❓ 𝄞");
192 escape(fixture.as_str());
193 assert_eq!(
194 escape(fixture.as_str()),
195 serde_json::to_string(fixture.as_str()).unwrap(),
196 "fixture: {:?}",
197 fixture
198 );
199}
200
201// Test cases for various string sizes to cover different SIMD paths
202
203#[test]
204fn test_empty_string() {
205 assert_eq!(escape(""), r#""""#);
206}
207
208#[test]
209fn test_very_small_strings() {
210 // Less than 16 bytes (SSE register size)
211 assert_eq!(escape("a"), r#""a""#);
212 assert_eq!(escape("ab"), r#""ab""#);
213 assert_eq!(escape("hello"), r#""hello""#);
214 assert_eq!(escape("hello\n"), r#""hello\n""#);
215 assert_eq!(escape("\""), r#""\"""#);
216 assert_eq!(escape("\\"), r#""\\""#);
217 assert_eq!(escape("\t"), r#""\t""#);
218 assert_eq!(escape("\r\n"), r#""\r\n""#);
219}
220
221#[test]
222fn test_small_strings_16_bytes() {
223 // Exactly 16 bytes - SSE register boundary
224 let s16 = "0123456789abcdef";
225 assert_eq!(s16.len(), 16);
226 assert_eq!(escape(s16), serde_json::to_string(s16).unwrap());
227
228 // 16 bytes with escapes
229 let s16_esc = "01234567\t9abcde";
230 assert_eq!(s16_esc.len(), 15); // \t is 1 byte
231 assert_eq!(escape(s16_esc), serde_json::to_string(s16_esc).unwrap());
232}
233
234#[test]
235fn test_medium_strings_32_bytes() {
236 // Exactly 32 bytes - AVX2 register boundary
237 let s32 = "0123456789abcdef0123456789abcdef";
238 assert_eq!(s32.len(), 32);
239 assert_eq!(escape(s32), serde_json::to_string(s32).unwrap());
240
241 // 32 bytes with escapes at different positions
242 let s32_esc = "0123456789abcde\"0123456789abcde";
243 assert_eq!(escape(s32_esc), serde_json::to_string(s32_esc).unwrap());
244}
245
246#[test]
247fn test_large_strings_128_bytes() {
248 // Exactly 128 bytes - main loop size
249 let s128 = "0123456789abcdef".repeat(8);
250 assert_eq!(s128.len(), 128);
251 assert_eq!(escape(&s128), serde_json::to_string(&s128).unwrap());
252
253 // 128 bytes with escapes spread throughout
254 let mut s128_esc = String::new();
255 for i in 0..8 {
256 if i % 2 == 0 {
257 s128_esc.push_str("0123456789abcd\n");
258 } else {
259 s128_esc.push_str("0123456789abcd\"");
260 }
261 }
262 assert_eq!(escape(&s128_esc), serde_json::to_string(&s128_esc).unwrap());
263}
264
265#[test]
266fn test_unaligned_data() {
267 // Test strings that start at various alignments
268 for offset in 0..32 {
269 let padding = " ".repeat(offset);
270 let test_str = format!("{}{}", padding, "test\nstring\"with\\escapes");
271 let result = escape(&test_str[offset..]);
272 let expected = serde_json::to_string(&test_str[offset..]).unwrap();
273 assert_eq!(result, expected, "Failed at offset {}", offset);
274 }
275}
276
277#[test]
278fn test_sparse_escapes() {
279 // Large string with escapes only at the beginning and end
280 let mut s = String::new();
281 s.push('"');
282 s.push_str(&"a".repeat(500));
283 s.push('\\');
284 assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
285}
286
287#[test]
288fn test_dense_escapes() {
289 // String with many escapes
290 let s = "\"\\\"\\\"\\\"\\".repeat(50);
291 assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
292
293 // All control characters
294 let mut ctrl = String::new();
295 for _ in 0..10 {
296 for i in 0u8..32 {
297 ctrl.push(i as char);
298 }
299 }
300 assert_eq!(escape(&ctrl), serde_json::to_string(&ctrl).unwrap());
301}
302
303#[test]
304fn test_boundary_conditions() {
305 // Test around 256 byte boundary (common cache line multiple)
306 for size in 250..260 {
307 let s = "a".repeat(size);
308 assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
309
310 // With escape at the end
311 let mut s_esc = "a".repeat(size - 1);
312 s_esc.push('"');
313 assert_eq!(escape(&s_esc), serde_json::to_string(&s_esc).unwrap());
314 }
315}
316
317#[test]
318fn test_all_escape_types() {
319 // Test each escape type individually
320 assert_eq!(escape("\x00"), r#""\u0000""#);
321 assert_eq!(escape("\x08"), r#""\b""#);
322 assert_eq!(escape("\x09"), r#""\t""#);
323 assert_eq!(escape("\x0A"), r#""\n""#);
324 assert_eq!(escape("\x0C"), r#""\f""#);
325 assert_eq!(escape("\x0D"), r#""\r""#);
326 assert_eq!(escape("\x1F"), r#""\u001f""#);
327 assert_eq!(escape("\""), r#""\"""#);
328 assert_eq!(escape("\\"), r#""\\""#);
329
330 // Test all control characters
331 for i in 0u8..32 {
332 let s = String::from_utf8(vec![i]).unwrap();
333 let result = escape(&s);
334 let expected = serde_json::to_string(&s).unwrap();
335 assert_eq!(result, expected, "Failed for byte 0x{:02x}", i);
336 }
337}
338
339#[test]
340fn test_mixed_content() {
341 // Mix of ASCII, escapes, and multi-byte UTF-8
342 let mixed = r#"Hello "World"!
343 Tab: Here
344 Emoji: 😀 Chinese: 中文
345 Math: ∑∫∂ Music: 𝄞
346 Escape: \" \\ \n \r \t"#;
347 assert_eq!(escape(mixed), serde_json::to_string(mixed).unwrap());
348}
349
350#[test]
351fn test_repeated_patterns() {
352 // Patterns that might benefit from or confuse SIMD operations
353 let pattern1 = "abcd".repeat(100);
354 assert_eq!(escape(&pattern1), serde_json::to_string(&pattern1).unwrap());
355
356 let pattern2 = "a\"b\"".repeat(100);
357 assert_eq!(escape(&pattern2), serde_json::to_string(&pattern2).unwrap());
358
359 let pattern3 = "\t\n".repeat(100);
360 assert_eq!(escape(&pattern3), serde_json::to_string(&pattern3).unwrap());
361}
362
363#[test]
364fn test_rxjs() {
365 let dir = glob::glob("node_modules/rxjs/src/**/*.ts").unwrap();
366 let mut sources = Vec::new();
367 for entry in dir {
368 sources.push(std::fs::read_to_string(entry.unwrap()).unwrap());
369 }
370 assert!(!sources.is_empty());
371 for source in sources {
372 assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
373 }
374}
375
376#[test]
377fn test_sources() {
378 let ts_paths = glob::glob("fixtures/**/*.ts").unwrap();
379 let tsx_paths = glob::glob("fixtures/**/*.tsx").unwrap();
380 let js_paths = glob::glob("fixtures/**/*.js").unwrap();
381 let mjs_paths = glob::glob("fixtures/**/*.mjs").unwrap();
382 let cjs_paths = glob::glob("fixtures/**/*.cjs").unwrap();
383 let mut sources = Vec::new();
384 for entry in ts_paths
385 .chain(tsx_paths)
386 .chain(js_paths)
387 .chain(mjs_paths)
388 .chain(cjs_paths)
389 {
390 let p = entry.unwrap();
391 if std::fs::metadata(&p).unwrap().is_file() {
392 sources.push(std::fs::read_to_string(&p).unwrap());
393 }
394 }
395 assert!(!sources.is_empty());
396 for source in sources {
397 assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
398 }
399}