json_escape_simd/lib.rs
1//! Optimized SIMD routines for escaping JSON strings.
2//!
3//! ## <div class="warning">Important</div>
4//!
5//! On aarch64 NEON hosts the available register width is **128** bits, which is narrower than the lookup table this implementation prefers. As a result the SIMD path may not outperform the generic fallback, which is reflected in the benchmark numbers below.
6//!
7//! On some modern macOS devices with larger register numbers, the SIMD path may outperform the generic fallback, see the [M3 max benchmark](#apple-m3-max) below.
8//!
9//! ### Note
10//!
11//! The `force_aarch64_neon` feature flag can be used to force use of the neon implementation on aarch64. This is useful for the benchmark.
12//!
13//! ## Benchmarks
14//!
15//! Numbers below come from `cargo bench` runs on GitHub Actions hardware. Criterion reports are summarized to make it easier to spot relative performance. "vs fastest" shows how much slower each implementation is compared to the fastest entry in the table (1.00× means fastest).
16//!
17//! ### GitHub Actions x86_64 (`ubuntu-latest`)
18//!
19//! `AVX2` enabled.
20//!
21//! **RxJS payload (~10k iterations)**
22//!
23//! | Implementation | Median time | vs fastest |
24//! | --------------------- | ------------- | ---------- |
25//! | **`escape simd`** | **345.06 µs** | **1.00×** |
26//! | `escape v_jsonescape` | 576.25 µs | 1.67× |
27//! | `escape generic` | 657.94 µs | 1.91× |
28//! | `serde_json` | 766.72 µs | 2.22× |
29//! | `json-escape` | 782.65 µs | 2.27× |
30//!
31//! **Fixtures payload (~300 iterations)**
32//!
33//! | Implementation | Median time | vs fastest |
34//! | --------------------- | ------------ | ---------- |
35//! | **`escape simd`** | **12.84 ms** | **1.00×** |
36//! | `escape v_jsonescape` | 19.66 ms | 1.53× |
37//! | `escape generic` | 22.53 ms | 1.75× |
38//! | `serde_json` | 24.65 ms | 1.92× |
39//! | `json-escape` | 26.64 ms | 2.07× |
40//!
41//! ### GitHub Actions aarch64 (`ubuntu-24.04-arm`)
42//!
43//! Neon enabled.
44//!
45//! **RxJS payload (~10k iterations)**
46//!
47//! | Implementation | Median time | vs fastest |
48//! | --------------------- | ------------- | ---------- |
49//! | **`escape generic`** | **546.89 µs** | **1.00×** |
50//! | `escape simd` | 589.29 µs | 1.08× |
51//! | `serde_json` | 612.33 µs | 1.12× |
52//! | `json-escape` | 624.66 µs | 1.14× |
53//! | `escape v_jsonescape` | 789.14 µs | 1.44× |
54//!
55//! **Fixtures payload (~300 iterations)**
56//!
57//! | Implementation | Median time | vs fastest |
58//! | --------------------- | ------------ | ---------- |
59//! | **`escape generic`** | **17.81 ms** | **1.00×** |
60//! | `serde_json` | 19.77 ms | 1.11× |
61//! | `json-escape` | 20.84 ms | 1.17× |
62//! | `escape simd` | 21.04 ms | 1.18× |
63//! | `escape v_jsonescape` | 25.57 ms | 1.44× |
64//!
65//! ### GitHub Actions macOS (`macos-latest`)
66//!
67//! Apple M1 chip
68//!
69//! **RxJS payload (~10k iterations)**
70//!
71//! | Implementation | Median time | vs fastest |
72//! | --------------------- | ------------- | ---------- |
73//! | **`escape generic`** | **759.07 µs** | **1.00×** |
74//! | `escape simd` | 764.98 µs | 1.01× |
75//! | `serde_json` | 793.91 µs | 1.05× |
76//! | `json-escape` | 868.21 µs | 1.14× |
77//! | `escape v_jsonescape` | 926.00 µs | 1.22× |
78//!
79//! **Fixtures payload (~300 iterations)**
80//!
81//! | Implementation | Median time | vs fastest |
82//! | --------------------- | ------------ | ---------- |
83//! | **`serde_json`** | **26.41 ms** | **1.00×** |
84//! | `escape generic` | 26.43 ms | 1.00× |
85//! | `escape simd` | 26.42 ms | 1.00× |
86//! | `json-escape` | 28.94 ms | 1.10× |
87//! | `escape v_jsonescape` | 29.22 ms | 1.11× |
88//!
89//! ### Apple M3 Max
90//!
91//! **RxJS payload (~10k iterations)**
92//!
93//! | Implementation | Median time | vs fastest |
94//! | --------------------- | ------------- | ---------- |
95//! | **`escape simd`** | **307.20 µs** | **1.00×** |
96//! | `escape generic` | 490.00 µs | 1.60× |
97//! | `serde_json` | 570.35 µs | 1.86× |
98//! | `escape v_jsonescape` | 599.72 µs | 1.95× |
99//! | `json-escape` | 644.73 µs | 2.10× |
100//!
101//! **Fixtures payload (~300 iterations)**
102//!
103//! | Implementation | Median time | vs fastest |
104//! | --------------------- | ------------ | ---------- |
105//! | **`escape generic`** | **17.89 ms** | **1.00×** |
106//! | **`escape simd`** | **17.92 ms** | **1.00×** |
107//! | `serde_json` | 19.78 ms | 1.11× |
108//! | `escape v_jsonescape` | 21.09 ms | 1.18× |
109//! | `json-escape` | 22.43 ms | 1.25× |
110
111#[cfg(target_arch = "aarch64")]
112mod aarch64;
113mod generic;
114#[cfg(target_arch = "x86_64")]
115mod x86;
116
117pub use generic::escape_generic;
118
119/// Main entry point for JSON string escaping with SIMD acceleration
120/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used.
121pub fn escape<S: AsRef<str>>(input: S) -> String {
122 #[cfg(target_arch = "x86_64")]
123 {
124 // Runtime CPU feature detection for x86_64
125 if is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512bw") {
126 unsafe { return x86::escape_avx512(input) }
127 } else if is_x86_feature_detected!("avx2") {
128 unsafe { return x86::escape_avx2(input) }
129 } else if is_x86_feature_detected!("sse2") {
130 unsafe { return x86::escape_sse2(input) }
131 } else {
132 return escape_generic(input);
133 }
134 }
135
136 #[cfg(target_arch = "aarch64")]
137 {
138 #[cfg(feature = "force_aarch64_neon")]
139 {
140 return aarch64::escape_neon(input);
141 }
142 #[cfg(not(feature = "force_aarch64_neon"))]
143 {
144 // on Apple M2 and later, the `bf16` feature is available
145 // it means they have more registers and can significantly benefit from the SIMD path
146 // TODO: add support for sve2 chips with wider registers
147 // github actions ubuntu-24.04-arm runner has 128 bits sve2 registers, it's not enough for the SIMD path
148 if cfg!(target_os = "macos") && std::arch::is_aarch64_feature_detected!("bf16") {
149 return aarch64::escape_neon(input);
150 } else {
151 return escape_generic(input);
152 }
153 }
154 }
155
156 #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
157 escape_generic(input)
158}
159
160#[test]
161fn test_escape_ascii_json_string() {
162 let fixture = r#"abcdefghijklmnopqrstuvwxyz .*? hello world escape json string"#;
163 assert_eq!(escape(fixture), serde_json::to_string(fixture).unwrap());
164}
165
166#[test]
167fn test_escape_json_string() {
168 let mut fixture = String::new();
169 for i in 0u8..=0x1F {
170 fixture.push(i as char);
171 }
172 fixture.push('\t');
173 fixture.push('\x08');
174 fixture.push('\x09');
175 fixture.push('\x0A');
176 fixture.push('\x0C');
177 fixture.push('\x0D');
178 fixture.push('\x22');
179 fixture.push('\x5C');
180 fixture.push_str("normal string");
181 fixture.push('😊');
182 fixture.push_str("中文 English 🚀 \n❓ 𝄞");
183 escape(fixture.as_str());
184 assert_eq!(
185 escape(fixture.as_str()),
186 serde_json::to_string(fixture.as_str()).unwrap(),
187 "fixture: {:?}",
188 fixture
189 );
190}
191
192// Test cases for various string sizes to cover different SIMD paths
193
194#[test]
195fn test_empty_string() {
196 assert_eq!(escape(""), r#""""#);
197}
198
199#[test]
200fn test_very_small_strings() {
201 // Less than 16 bytes (SSE register size)
202 assert_eq!(escape("a"), r#""a""#);
203 assert_eq!(escape("ab"), r#""ab""#);
204 assert_eq!(escape("hello"), r#""hello""#);
205 assert_eq!(escape("hello\n"), r#""hello\n""#);
206 assert_eq!(escape("\""), r#""\"""#);
207 assert_eq!(escape("\\"), r#""\\""#);
208 assert_eq!(escape("\t"), r#""\t""#);
209 assert_eq!(escape("\r\n"), r#""\r\n""#);
210}
211
212#[test]
213fn test_small_strings_16_bytes() {
214 // Exactly 16 bytes - SSE register boundary
215 let s16 = "0123456789abcdef";
216 assert_eq!(s16.len(), 16);
217 assert_eq!(escape(s16), serde_json::to_string(s16).unwrap());
218
219 // 16 bytes with escapes
220 let s16_esc = "01234567\t9abcde";
221 assert_eq!(s16_esc.len(), 15); // \t is 1 byte
222 assert_eq!(escape(s16_esc), serde_json::to_string(s16_esc).unwrap());
223}
224
225#[test]
226fn test_medium_strings_32_bytes() {
227 // Exactly 32 bytes - AVX2 register boundary
228 let s32 = "0123456789abcdef0123456789abcdef";
229 assert_eq!(s32.len(), 32);
230 assert_eq!(escape(s32), serde_json::to_string(s32).unwrap());
231
232 // 32 bytes with escapes at different positions
233 let s32_esc = "0123456789abcde\"0123456789abcde";
234 assert_eq!(escape(s32_esc), serde_json::to_string(s32_esc).unwrap());
235}
236
237#[test]
238fn test_large_strings_128_bytes() {
239 // Exactly 128 bytes - main loop size
240 let s128 = "0123456789abcdef".repeat(8);
241 assert_eq!(s128.len(), 128);
242 assert_eq!(escape(&s128), serde_json::to_string(&s128).unwrap());
243
244 // 128 bytes with escapes spread throughout
245 let mut s128_esc = String::new();
246 for i in 0..8 {
247 if i % 2 == 0 {
248 s128_esc.push_str("0123456789abcd\n");
249 } else {
250 s128_esc.push_str("0123456789abcd\"");
251 }
252 }
253 assert_eq!(escape(&s128_esc), serde_json::to_string(&s128_esc).unwrap());
254}
255
256#[test]
257fn test_unaligned_data() {
258 // Test strings that start at various alignments
259 for offset in 0..32 {
260 let padding = " ".repeat(offset);
261 let test_str = format!("{}{}", padding, "test\nstring\"with\\escapes");
262 let result = escape(&test_str[offset..]);
263 let expected = serde_json::to_string(&test_str[offset..]).unwrap();
264 assert_eq!(result, expected, "Failed at offset {}", offset);
265 }
266}
267
268#[test]
269fn test_sparse_escapes() {
270 // Large string with escapes only at the beginning and end
271 let mut s = String::new();
272 s.push('"');
273 s.push_str(&"a".repeat(500));
274 s.push('\\');
275 assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
276}
277
278#[test]
279fn test_dense_escapes() {
280 // String with many escapes
281 let s = "\"\\\"\\\"\\\"\\".repeat(50);
282 assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
283
284 // All control characters
285 let mut ctrl = String::new();
286 for _ in 0..10 {
287 for i in 0u8..32 {
288 ctrl.push(i as char);
289 }
290 }
291 assert_eq!(escape(&ctrl), serde_json::to_string(&ctrl).unwrap());
292}
293
294#[test]
295fn test_boundary_conditions() {
296 // Test around 256 byte boundary (common cache line multiple)
297 for size in 250..260 {
298 let s = "a".repeat(size);
299 assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
300
301 // With escape at the end
302 let mut s_esc = "a".repeat(size - 1);
303 s_esc.push('"');
304 assert_eq!(escape(&s_esc), serde_json::to_string(&s_esc).unwrap());
305 }
306}
307
308#[test]
309fn test_all_escape_types() {
310 // Test each escape type individually
311 assert_eq!(escape("\x00"), r#""\u0000""#);
312 assert_eq!(escape("\x08"), r#""\b""#);
313 assert_eq!(escape("\x09"), r#""\t""#);
314 assert_eq!(escape("\x0A"), r#""\n""#);
315 assert_eq!(escape("\x0C"), r#""\f""#);
316 assert_eq!(escape("\x0D"), r#""\r""#);
317 assert_eq!(escape("\x1F"), r#""\u001f""#);
318 assert_eq!(escape("\""), r#""\"""#);
319 assert_eq!(escape("\\"), r#""\\""#);
320
321 // Test all control characters
322 for i in 0u8..32 {
323 let s = String::from_utf8(vec![i]).unwrap();
324 let result = escape(&s);
325 let expected = serde_json::to_string(&s).unwrap();
326 assert_eq!(result, expected, "Failed for byte 0x{:02x}", i);
327 }
328}
329
330#[test]
331fn test_mixed_content() {
332 // Mix of ASCII, escapes, and multi-byte UTF-8
333 let mixed = r#"Hello "World"!
334 Tab: Here
335 Emoji: 😀 Chinese: 中文
336 Math: ∑∫∂ Music: 𝄞
337 Escape: \" \\ \n \r \t"#;
338 assert_eq!(escape(mixed), serde_json::to_string(mixed).unwrap());
339}
340
341#[test]
342fn test_repeated_patterns() {
343 // Patterns that might benefit from or confuse SIMD operations
344 let pattern1 = "abcd".repeat(100);
345 assert_eq!(escape(&pattern1), serde_json::to_string(&pattern1).unwrap());
346
347 let pattern2 = "a\"b\"".repeat(100);
348 assert_eq!(escape(&pattern2), serde_json::to_string(&pattern2).unwrap());
349
350 let pattern3 = "\t\n".repeat(100);
351 assert_eq!(escape(&pattern3), serde_json::to_string(&pattern3).unwrap());
352}
353
354#[test]
355fn test_rxjs() {
356 let dir = glob::glob("node_modules/rxjs/src/**/*.ts").unwrap();
357 let mut sources = Vec::new();
358 for entry in dir {
359 sources.push(std::fs::read_to_string(entry.unwrap()).unwrap());
360 }
361 assert!(!sources.is_empty());
362 for source in sources {
363 assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
364 }
365}
366
367#[test]
368fn test_sources() {
369 let ts_paths = glob::glob("fixtures/**/*.ts").unwrap();
370 let tsx_paths = glob::glob("fixtures/**/*.tsx").unwrap();
371 let js_paths = glob::glob("fixtures/**/*.js").unwrap();
372 let mjs_paths = glob::glob("fixtures/**/*.mjs").unwrap();
373 let cjs_paths = glob::glob("fixtures/**/*.cjs").unwrap();
374 let mut sources = Vec::new();
375 for entry in ts_paths
376 .chain(tsx_paths)
377 .chain(js_paths)
378 .chain(mjs_paths)
379 .chain(cjs_paths)
380 {
381 let p = entry.unwrap();
382 if std::fs::metadata(&p).unwrap().is_file() {
383 sources.push(std::fs::read_to_string(&p).unwrap());
384 }
385 }
386 assert!(!sources.is_empty());
387 for source in sources {
388 assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
389 }
390}