1#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
6use std::arch::is_x86_feature_detected;
7
8mod simd;
9
10pub(crate) const QUOTE_TAB: [(u8, [u8; 8]); 256] = [
11 (6, *b"\\u0000\0\0"),
13 (6, *b"\\u0001\0\0"),
14 (6, *b"\\u0002\0\0"),
15 (6, *b"\\u0003\0\0"),
16 (6, *b"\\u0004\0\0"),
17 (6, *b"\\u0005\0\0"),
18 (6, *b"\\u0006\0\0"),
19 (6, *b"\\u0007\0\0"),
20 (2, *b"\\b\0\0\0\0\0\0"),
21 (2, *b"\\t\0\0\0\0\0\0"),
22 (2, *b"\\n\0\0\0\0\0\0"),
23 (6, *b"\\u000b\0\0"),
24 (2, *b"\\f\0\0\0\0\0\0"),
25 (2, *b"\\r\0\0\0\0\0\0"),
26 (6, *b"\\u000e\0\0"),
27 (6, *b"\\u000f\0\0"),
28 (6, *b"\\u0010\0\0"),
29 (6, *b"\\u0011\0\0"),
30 (6, *b"\\u0012\0\0"),
31 (6, *b"\\u0013\0\0"),
32 (6, *b"\\u0014\0\0"),
33 (6, *b"\\u0015\0\0"),
34 (6, *b"\\u0016\0\0"),
35 (6, *b"\\u0017\0\0"),
36 (6, *b"\\u0018\0\0"),
37 (6, *b"\\u0019\0\0"),
38 (6, *b"\\u001a\0\0"),
39 (6, *b"\\u001b\0\0"),
40 (6, *b"\\u001c\0\0"),
41 (6, *b"\\u001d\0\0"),
42 (6, *b"\\u001e\0\0"),
43 (6, *b"\\u001f\0\0"),
44 (0, [0; 8]),
46 (0, [0; 8]),
47 (2, *b"\\\"\0\0\0\0\0\0"),
48 (0, [0; 8]),
49 (0, [0; 8]),
50 (0, [0; 8]),
51 (0, [0; 8]),
52 (0, [0; 8]),
53 (0, [0; 8]),
54 (0, [0; 8]),
55 (0, [0; 8]),
56 (0, [0; 8]),
57 (0, [0; 8]),
58 (0, [0; 8]),
59 (0, [0; 8]),
60 (0, [0; 8]),
61 (0, [0; 8]),
63 (0, [0; 8]),
64 (0, [0; 8]),
65 (0, [0; 8]),
66 (0, [0; 8]),
67 (0, [0; 8]),
68 (0, [0; 8]),
69 (0, [0; 8]),
70 (0, [0; 8]),
71 (0, [0; 8]),
72 (0, [0; 8]),
73 (0, [0; 8]),
74 (0, [0; 8]),
75 (0, [0; 8]),
76 (0, [0; 8]),
77 (0, [0; 8]),
78 (0, [0; 8]),
80 (0, [0; 8]),
81 (0, [0; 8]),
82 (0, [0; 8]),
83 (0, [0; 8]),
84 (0, [0; 8]),
85 (0, [0; 8]),
86 (0, [0; 8]),
87 (0, [0; 8]),
88 (0, [0; 8]),
89 (0, [0; 8]),
90 (0, [0; 8]),
91 (0, [0; 8]),
92 (0, [0; 8]),
93 (0, [0; 8]),
94 (0, [0; 8]),
95 (0, [0; 8]),
97 (0, [0; 8]),
98 (0, [0; 8]),
99 (0, [0; 8]),
100 (0, [0; 8]),
101 (0, [0; 8]),
102 (0, [0; 8]),
103 (0, [0; 8]),
104 (0, [0; 8]),
105 (0, [0; 8]),
106 (0, [0; 8]),
107 (0, [0; 8]),
108 (2, *b"\\\\\0\0\0\0\0\0"),
109 (0, [0; 8]),
110 (0, [0; 8]),
111 (0, [0; 8]),
112 (0, [0; 8]),
114 (0, [0; 8]),
115 (0, [0; 8]),
116 (0, [0; 8]),
117 (0, [0; 8]),
118 (0, [0; 8]),
119 (0, [0; 8]),
120 (0, [0; 8]),
121 (0, [0; 8]),
122 (0, [0; 8]),
123 (0, [0; 8]),
124 (0, [0; 8]),
125 (0, [0; 8]),
126 (0, [0; 8]),
127 (0, [0; 8]),
128 (0, [0; 8]),
129 (0, [0; 8]),
130 (0, [0; 8]),
131 (0, [0; 8]),
132 (0, [0; 8]),
133 (0, [0; 8]),
134 (0, [0; 8]),
135 (0, [0; 8]),
136 (0, [0; 8]),
137 (0, [0; 8]),
138 (0, [0; 8]),
139 (0, [0; 8]),
140 (0, [0; 8]),
141 (0, [0; 8]),
142 (0, [0; 8]),
143 (0, [0; 8]),
144 (0, [0; 8]),
145 (0, [0; 8]),
146 (0, [0; 8]),
147 (0, [0; 8]),
148 (0, [0; 8]),
149 (0, [0; 8]),
150 (0, [0; 8]),
151 (0, [0; 8]),
152 (0, [0; 8]),
153 (0, [0; 8]),
154 (0, [0; 8]),
155 (0, [0; 8]),
156 (0, [0; 8]),
157 (0, [0; 8]),
158 (0, [0; 8]),
159 (0, [0; 8]),
160 (0, [0; 8]),
161 (0, [0; 8]),
162 (0, [0; 8]),
163 (0, [0; 8]),
164 (0, [0; 8]),
165 (0, [0; 8]),
166 (0, [0; 8]),
167 (0, [0; 8]),
168 (0, [0; 8]),
169 (0, [0; 8]),
170 (0, [0; 8]),
171 (0, [0; 8]),
172 (0, [0; 8]),
173 (0, [0; 8]),
174 (0, [0; 8]),
175 (0, [0; 8]),
176 (0, [0; 8]),
177 (0, [0; 8]),
178 (0, [0; 8]),
179 (0, [0; 8]),
180 (0, [0; 8]),
181 (0, [0; 8]),
182 (0, [0; 8]),
183 (0, [0; 8]),
184 (0, [0; 8]),
185 (0, [0; 8]),
186 (0, [0; 8]),
187 (0, [0; 8]),
188 (0, [0; 8]),
189 (0, [0; 8]),
190 (0, [0; 8]),
191 (0, [0; 8]),
192 (0, [0; 8]),
193 (0, [0; 8]),
194 (0, [0; 8]),
195 (0, [0; 8]),
196 (0, [0; 8]),
197 (0, [0; 8]),
198 (0, [0; 8]),
199 (0, [0; 8]),
200 (0, [0; 8]),
201 (0, [0; 8]),
202 (0, [0; 8]),
203 (0, [0; 8]),
204 (0, [0; 8]),
205 (0, [0; 8]),
206 (0, [0; 8]),
207 (0, [0; 8]),
208 (0, [0; 8]),
209 (0, [0; 8]),
210 (0, [0; 8]),
211 (0, [0; 8]),
212 (0, [0; 8]),
213 (0, [0; 8]),
214 (0, [0; 8]),
215 (0, [0; 8]),
216 (0, [0; 8]),
217 (0, [0; 8]),
218 (0, [0; 8]),
219 (0, [0; 8]),
220 (0, [0; 8]),
221 (0, [0; 8]),
222 (0, [0; 8]),
223 (0, [0; 8]),
224 (0, [0; 8]),
225 (0, [0; 8]),
226 (0, [0; 8]),
227 (0, [0; 8]),
228 (0, [0; 8]),
229 (0, [0; 8]),
230 (0, [0; 8]),
231 (0, [0; 8]),
232 (0, [0; 8]),
233 (0, [0; 8]),
234 (0, [0; 8]),
235 (0, [0; 8]),
236 (0, [0; 8]),
237 (0, [0; 8]),
238 (0, [0; 8]),
239 (0, [0; 8]),
240 (0, [0; 8]),
241 (0, [0; 8]),
242 (0, [0; 8]),
243 (0, [0; 8]),
244 (0, [0; 8]),
245 (0, [0; 8]),
246 (0, [0; 8]),
247 (0, [0; 8]),
248 (0, [0; 8]),
249 (0, [0; 8]),
250 (0, [0; 8]),
251 (0, [0; 8]),
252 (0, [0; 8]),
253 (0, [0; 8]),
254 (0, [0; 8]),
255 (0, [0; 8]),
256 (0, [0; 8]),
257 (0, [0; 8]),
258 (0, [0; 8]),
259 (0, [0; 8]),
260 (0, [0; 8]),
261 (0, [0; 8]),
262 (0, [0; 8]),
263 (0, [0; 8]),
264 (0, [0; 8]),
265 (0, [0; 8]),
266 (0, [0; 8]),
267 (0, [0; 8]),
268 (0, [0; 8]),
269 (0, [0; 8]),
270 (0, [0; 8]),
271 (0, [0; 8]),
272 (0, [0; 8]),
273];
274
275pub(crate) const NEED_ESCAPED: [u8; 256] = [
276 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
277 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
278 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
279 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
281 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
282 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
283 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
284];
285
286#[inline(always)]
287fn format_string(value: &str, dst: &mut [u8]) -> usize {
288 #[cfg(target_arch = "aarch64")]
289 {
290 let has_neon = cfg!(target_os = "macos") || std::arch::is_aarch64_feature_detected!("neon");
291 if has_neon {
292 unsafe { simd::neon::format_string(value, dst) }
293 } else {
294 simd::v128::format_string(value, dst)
295 }
296 }
297
298 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
299 {
300 if is_x86_feature_detected!("avx512f") {
301 unsafe { simd::avx512::format_string(value, dst) }
302 } else if is_x86_feature_detected!("avx2") {
303 unsafe { simd::avx2::format_string(value, dst) }
304 } else if is_x86_feature_detected!("sse2") {
305 unsafe { simd::sse2::format_string(value, dst) }
306 } else {
307 simd::v128::format_string(value, dst)
308 }
309 }
310
311 #[cfg(not(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64")))]
312 {
313 simd::v128::format_string(value, dst)
314 }
315}
316
317pub fn escape(value: &str) -> String {
318 let capacity = value.len() * 6 + 32 + 3;
319 let mut buf = Vec::with_capacity(capacity);
320 #[allow(clippy::uninit_vec)]
321 unsafe {
322 buf.set_len(capacity)
323 };
324 let cnt = format_string(value, &mut buf);
325 unsafe { buf.set_len(cnt) };
326 unsafe { String::from_utf8_unchecked(buf) }
327}
328
329pub fn escape_into<S: AsRef<str>>(value: S, dst: &mut Vec<u8>) {
333 let value = value.as_ref();
334 let old_len = dst.len();
335
336 unsafe {
339 let spare =
341 std::slice::from_raw_parts_mut(dst.as_mut_ptr().add(old_len), dst.capacity() - old_len);
342 let cnt = format_string(value, spare);
343 dst.set_len(old_len + cnt);
344 }
345}
346
347#[cfg(test)]
348mod tests {
349 use std::fs::read_dir;
350 use std::path::{Path, PathBuf};
351
352 use rand::seq::SliceRandom;
353
354 use super::*;
355
356 #[test]
357 fn test_escape_ascii_json_string() {
358 let fixture = r#"abcdefghijklmnopqrstuvwxyz .*? hello world escape json string"#;
359 assert_eq!(escape(fixture), serde_json::to_string(fixture).unwrap());
360 }
361
362 #[test]
363 fn test_escape_json_string() {
364 let mut fixture = String::new();
365 for i in 0u8..=0x1F {
366 fixture.push(i as char);
367 }
368 fixture.push('\t');
369 fixture.push('\x08');
370 fixture.push('\x09');
371 fixture.push('\x0A');
372 fixture.push('\x0C');
373 fixture.push('\x0D');
374 fixture.push('\x22');
375 fixture.push('\x5C');
376 fixture.push_str("normal string");
377 fixture.push('π');
378 fixture.push_str("δΈζ English π \nβ π");
379 escape(fixture.as_str());
380 assert_eq!(
381 escape(fixture.as_str()),
382 serde_json::to_string(fixture.as_str()).unwrap(),
383 "fixture: {:?}",
384 fixture
385 );
386 }
387
388 #[test]
391 fn test_empty_string() {
392 assert_eq!(escape(""), r#""""#);
393 }
394
395 #[test]
396 fn test_very_small_strings() {
397 assert_eq!(escape("a"), r#""a""#);
399 assert_eq!(escape("ab"), r#""ab""#);
400 assert_eq!(escape("hello"), r#""hello""#);
401 assert_eq!(escape("hello\n"), r#""hello\n""#);
402 assert_eq!(escape("\""), r#""\"""#);
403 assert_eq!(escape("\\"), r#""\\""#);
404 assert_eq!(escape("\t"), r#""\t""#);
405 assert_eq!(escape("\r\n"), r#""\r\n""#);
406 }
407
408 #[test]
409 fn test_small_strings_16_bytes() {
410 let s16 = "0123456789abcdef";
412 assert_eq!(s16.len(), 16);
413 assert_eq!(escape(s16), serde_json::to_string(s16).unwrap());
414
415 let s16_esc = "01234567\t9abcde";
417 assert_eq!(s16_esc.len(), 15); assert_eq!(escape(s16_esc), serde_json::to_string(s16_esc).unwrap());
419 }
420
421 #[test]
422 fn test_medium_strings_32_bytes() {
423 let s32 = "0123456789abcdef0123456789abcdef";
425 assert_eq!(s32.len(), 32);
426 assert_eq!(escape(s32), serde_json::to_string(s32).unwrap());
427
428 let s32_esc = "0123456789abcde\"0123456789abcde";
430 assert_eq!(escape(s32_esc), serde_json::to_string(s32_esc).unwrap());
431 }
432
433 #[test]
434 fn test_large_strings_128_bytes() {
435 let s128 = "0123456789abcdef".repeat(8);
437 assert_eq!(s128.len(), 128);
438 assert_eq!(escape(&s128), serde_json::to_string(&s128).unwrap());
439
440 let mut s128_esc = String::new();
442 for i in 0..8 {
443 if i % 2 == 0 {
444 s128_esc.push_str("0123456789abcd\n");
445 } else {
446 s128_esc.push_str("0123456789abcd\"");
447 }
448 }
449 assert_eq!(escape(&s128_esc), serde_json::to_string(&s128_esc).unwrap());
450 }
451
452 #[test]
453 fn test_unaligned_data() {
454 for offset in 0..32 {
456 let padding = " ".repeat(offset);
457 let test_str = format!("{}{}", padding, "test\nstring\"with\\escapes");
458 let result = escape(&test_str[offset..]);
459 let expected = serde_json::to_string(&test_str[offset..]).unwrap();
460 assert_eq!(result, expected, "Failed at offset {}", offset);
461 }
462 }
463
464 #[test]
465 fn test_sparse_escapes() {
466 let mut s = String::new();
468 s.push('"');
469 s.push_str(&"a".repeat(500));
470 s.push('\\');
471 assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
472 }
473
474 #[test]
475 fn test_dense_escapes() {
476 let s = "\"\\\"\\\"\\\"\\".repeat(50);
478 assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
479
480 let mut ctrl = String::new();
482 for _ in 0..10 {
483 for i in 0u8..32 {
484 ctrl.push(i as char);
485 }
486 }
487 assert_eq!(escape(&ctrl), serde_json::to_string(&ctrl).unwrap());
488 }
489
490 #[test]
491 fn test_boundary_conditions() {
492 for size in 250..260 {
494 let s = "a".repeat(size);
495 assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
496
497 let mut s_esc = "a".repeat(size - 1);
499 s_esc.push('"');
500 assert_eq!(escape(&s_esc), serde_json::to_string(&s_esc).unwrap());
501 }
502 }
503
504 #[test]
505 fn test_all_escape_types() {
506 assert_eq!(escape("\x00"), r#""\u0000""#);
508 assert_eq!(escape("\x08"), r#""\b""#);
509 assert_eq!(escape("\x09"), r#""\t""#);
510 assert_eq!(escape("\x0A"), r#""\n""#);
511 assert_eq!(escape("\x0C"), r#""\f""#);
512 assert_eq!(escape("\x0D"), r#""\r""#);
513 assert_eq!(escape("\x1F"), r#""\u001f""#);
514 assert_eq!(escape("\""), r#""\"""#);
515 assert_eq!(escape("\\"), r#""\\""#);
516
517 for i in 0u8..32 {
519 let s = String::from_utf8(vec![i]).unwrap();
520 let result = escape(&s);
521 let expected = String::from_utf8(QUOTE_TAB[i as usize].1.to_vec())
522 .unwrap()
523 .trim_end_matches('\0')
524 .to_string();
525 assert_eq!(
526 result,
527 format!("\"{}\"", expected),
528 "Failed for byte 0x{:02x}",
529 i
530 );
531 }
532 }
533
534 #[test]
535 fn test_mixed_content() {
536 let mixed = r#"Hello "World"!
538 Tab: Here
539 Emoji: π Chinese: δΈζ
540 Math: ββ«β Music: π
541 Escape: \" \\ \n \r \t"#;
542 assert_eq!(escape(mixed), serde_json::to_string(mixed).unwrap());
543 }
544
545 #[test]
546 fn test_repeated_patterns() {
547 let pattern1 = "abcd".repeat(100);
549 assert_eq!(escape(&pattern1), serde_json::to_string(&pattern1).unwrap());
550
551 let pattern2 = "a\"b\"".repeat(100);
552 assert_eq!(escape(&pattern2), serde_json::to_string(&pattern2).unwrap());
553
554 let pattern3 = "\t\n".repeat(100);
555 assert_eq!(escape(&pattern3), serde_json::to_string(&pattern3).unwrap());
556 }
557
558 #[test]
559 fn test_rxjs() {
560 let mut sources = Vec::new();
561 read_dir_recursive("node_modules/rxjs/src", &mut sources, |p| {
562 matches!(p.extension().and_then(|e| e.to_str()), Some("ts"))
563 })
564 .unwrap();
565 assert!(!sources.is_empty());
566 sources.shuffle(&mut rand::rng());
567 for source in sources
568 .iter()
569 .take(if cfg!(miri) { 10 } else { sources.len() })
570 {
571 assert_eq!(escape(source), serde_json::to_string(&source).unwrap());
572 let mut output = String::with_capacity(source.len() * 6 + 32 + 3);
573 escape_into(source, unsafe { output.as_mut_vec() });
574 assert_eq!(output, serde_json::to_string(&source).unwrap());
575 }
576 }
577
578 #[test]
579 fn test_sources() {
580 for source in load_affine_sources().unwrap() {
581 assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
582 let mut output = String::with_capacity(source.len() * 6 + 32 + 3);
583 escape_into(&source, unsafe { output.as_mut_vec() });
584 assert_eq!(output, serde_json::to_string(&source).unwrap());
585 }
586 }
587
588 fn load_affine_sources() -> Result<impl Iterator<Item = String>, std::io::Error> {
589 let mut sources = Vec::new();
590 read_dir_recursive("fixtures", &mut sources, |p| {
591 matches!(
592 p.extension().and_then(|e| e.to_str()),
593 Some("ts") | Some("tsx") | Some("js") | Some("mjs") | Some("cjs")
594 )
595 })?;
596 assert!(!sources.is_empty());
597 let len = sources.len();
598 sources.shuffle(&mut rand::rng());
599 Ok(sources.into_iter().take(if cfg!(miri) { 10 } else { len }))
600 }
601
602 fn read_dir_recursive<P: AsRef<Path>, F: Fn(PathBuf) -> bool + Copy>(
603 dir: P,
604 sources: &mut Vec<String>,
605 f: F,
606 ) -> Result<(), std::io::Error> {
607 let dir = read_dir(dir)?;
608 for entry in dir {
609 let p = entry?;
610 let metadata = std::fs::metadata(p.path())?;
611 if metadata.is_file() && f(p.path()) {
612 sources.push(std::fs::read_to_string(p.path())?);
613 }
614 if metadata.is_dir() {
615 read_dir_recursive(p.path(), sources, f)?;
616 }
617 }
618 Ok(())
619 }
620}