1#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
6use std::arch::is_x86_feature_detected;
7
8mod simd;
9
10pub(crate) const QUOTE_TAB: [(u8, [u8; 8]); 256] = [
11 (6, *b"\\u0000\0\0"),
13 (6, *b"\\u0001\0\0"),
14 (6, *b"\\u0002\0\0"),
15 (6, *b"\\u0003\0\0"),
16 (6, *b"\\u0004\0\0"),
17 (6, *b"\\u0005\0\0"),
18 (6, *b"\\u0006\0\0"),
19 (6, *b"\\u0007\0\0"),
20 (2, *b"\\b\0\0\0\0\0\0"),
21 (2, *b"\\t\0\0\0\0\0\0"),
22 (2, *b"\\n\0\0\0\0\0\0"),
23 (6, *b"\\u000b\0\0"),
24 (2, *b"\\f\0\0\0\0\0\0"),
25 (2, *b"\\r\0\0\0\0\0\0"),
26 (6, *b"\\u000e\0\0"),
27 (6, *b"\\u000f\0\0"),
28 (6, *b"\\u0010\0\0"),
29 (6, *b"\\u0011\0\0"),
30 (6, *b"\\u0012\0\0"),
31 (6, *b"\\u0013\0\0"),
32 (6, *b"\\u0014\0\0"),
33 (6, *b"\\u0015\0\0"),
34 (6, *b"\\u0016\0\0"),
35 (6, *b"\\u0017\0\0"),
36 (6, *b"\\u0018\0\0"),
37 (6, *b"\\u0019\0\0"),
38 (6, *b"\\u001a\0\0"),
39 (6, *b"\\u001b\0\0"),
40 (6, *b"\\u001c\0\0"),
41 (6, *b"\\u001d\0\0"),
42 (6, *b"\\u001e\0\0"),
43 (6, *b"\\u001f\0\0"),
44 (0, [0; 8]),
46 (0, [0; 8]),
47 (2, *b"\\\"\0\0\0\0\0\0"),
48 (0, [0; 8]),
49 (0, [0; 8]),
50 (0, [0; 8]),
51 (0, [0; 8]),
52 (0, [0; 8]),
53 (0, [0; 8]),
54 (0, [0; 8]),
55 (0, [0; 8]),
56 (0, [0; 8]),
57 (0, [0; 8]),
58 (0, [0; 8]),
59 (0, [0; 8]),
60 (0, [0; 8]),
61 (0, [0; 8]),
63 (0, [0; 8]),
64 (0, [0; 8]),
65 (0, [0; 8]),
66 (0, [0; 8]),
67 (0, [0; 8]),
68 (0, [0; 8]),
69 (0, [0; 8]),
70 (0, [0; 8]),
71 (0, [0; 8]),
72 (0, [0; 8]),
73 (0, [0; 8]),
74 (0, [0; 8]),
75 (0, [0; 8]),
76 (0, [0; 8]),
77 (0, [0; 8]),
78 (0, [0; 8]),
80 (0, [0; 8]),
81 (0, [0; 8]),
82 (0, [0; 8]),
83 (0, [0; 8]),
84 (0, [0; 8]),
85 (0, [0; 8]),
86 (0, [0; 8]),
87 (0, [0; 8]),
88 (0, [0; 8]),
89 (0, [0; 8]),
90 (0, [0; 8]),
91 (0, [0; 8]),
92 (0, [0; 8]),
93 (0, [0; 8]),
94 (0, [0; 8]),
95 (0, [0; 8]),
97 (0, [0; 8]),
98 (0, [0; 8]),
99 (0, [0; 8]),
100 (0, [0; 8]),
101 (0, [0; 8]),
102 (0, [0; 8]),
103 (0, [0; 8]),
104 (0, [0; 8]),
105 (0, [0; 8]),
106 (0, [0; 8]),
107 (0, [0; 8]),
108 (2, *b"\\\\\0\0\0\0\0\0"),
109 (0, [0; 8]),
110 (0, [0; 8]),
111 (0, [0; 8]),
112 (0, [0; 8]),
114 (0, [0; 8]),
115 (0, [0; 8]),
116 (0, [0; 8]),
117 (0, [0; 8]),
118 (0, [0; 8]),
119 (0, [0; 8]),
120 (0, [0; 8]),
121 (0, [0; 8]),
122 (0, [0; 8]),
123 (0, [0; 8]),
124 (0, [0; 8]),
125 (0, [0; 8]),
126 (0, [0; 8]),
127 (0, [0; 8]),
128 (0, [0; 8]),
129 (0, [0; 8]),
130 (0, [0; 8]),
131 (0, [0; 8]),
132 (0, [0; 8]),
133 (0, [0; 8]),
134 (0, [0; 8]),
135 (0, [0; 8]),
136 (0, [0; 8]),
137 (0, [0; 8]),
138 (0, [0; 8]),
139 (0, [0; 8]),
140 (0, [0; 8]),
141 (0, [0; 8]),
142 (0, [0; 8]),
143 (0, [0; 8]),
144 (0, [0; 8]),
145 (0, [0; 8]),
146 (0, [0; 8]),
147 (0, [0; 8]),
148 (0, [0; 8]),
149 (0, [0; 8]),
150 (0, [0; 8]),
151 (0, [0; 8]),
152 (0, [0; 8]),
153 (0, [0; 8]),
154 (0, [0; 8]),
155 (0, [0; 8]),
156 (0, [0; 8]),
157 (0, [0; 8]),
158 (0, [0; 8]),
159 (0, [0; 8]),
160 (0, [0; 8]),
161 (0, [0; 8]),
162 (0, [0; 8]),
163 (0, [0; 8]),
164 (0, [0; 8]),
165 (0, [0; 8]),
166 (0, [0; 8]),
167 (0, [0; 8]),
168 (0, [0; 8]),
169 (0, [0; 8]),
170 (0, [0; 8]),
171 (0, [0; 8]),
172 (0, [0; 8]),
173 (0, [0; 8]),
174 (0, [0; 8]),
175 (0, [0; 8]),
176 (0, [0; 8]),
177 (0, [0; 8]),
178 (0, [0; 8]),
179 (0, [0; 8]),
180 (0, [0; 8]),
181 (0, [0; 8]),
182 (0, [0; 8]),
183 (0, [0; 8]),
184 (0, [0; 8]),
185 (0, [0; 8]),
186 (0, [0; 8]),
187 (0, [0; 8]),
188 (0, [0; 8]),
189 (0, [0; 8]),
190 (0, [0; 8]),
191 (0, [0; 8]),
192 (0, [0; 8]),
193 (0, [0; 8]),
194 (0, [0; 8]),
195 (0, [0; 8]),
196 (0, [0; 8]),
197 (0, [0; 8]),
198 (0, [0; 8]),
199 (0, [0; 8]),
200 (0, [0; 8]),
201 (0, [0; 8]),
202 (0, [0; 8]),
203 (0, [0; 8]),
204 (0, [0; 8]),
205 (0, [0; 8]),
206 (0, [0; 8]),
207 (0, [0; 8]),
208 (0, [0; 8]),
209 (0, [0; 8]),
210 (0, [0; 8]),
211 (0, [0; 8]),
212 (0, [0; 8]),
213 (0, [0; 8]),
214 (0, [0; 8]),
215 (0, [0; 8]),
216 (0, [0; 8]),
217 (0, [0; 8]),
218 (0, [0; 8]),
219 (0, [0; 8]),
220 (0, [0; 8]),
221 (0, [0; 8]),
222 (0, [0; 8]),
223 (0, [0; 8]),
224 (0, [0; 8]),
225 (0, [0; 8]),
226 (0, [0; 8]),
227 (0, [0; 8]),
228 (0, [0; 8]),
229 (0, [0; 8]),
230 (0, [0; 8]),
231 (0, [0; 8]),
232 (0, [0; 8]),
233 (0, [0; 8]),
234 (0, [0; 8]),
235 (0, [0; 8]),
236 (0, [0; 8]),
237 (0, [0; 8]),
238 (0, [0; 8]),
239 (0, [0; 8]),
240 (0, [0; 8]),
241 (0, [0; 8]),
242 (0, [0; 8]),
243 (0, [0; 8]),
244 (0, [0; 8]),
245 (0, [0; 8]),
246 (0, [0; 8]),
247 (0, [0; 8]),
248 (0, [0; 8]),
249 (0, [0; 8]),
250 (0, [0; 8]),
251 (0, [0; 8]),
252 (0, [0; 8]),
253 (0, [0; 8]),
254 (0, [0; 8]),
255 (0, [0; 8]),
256 (0, [0; 8]),
257 (0, [0; 8]),
258 (0, [0; 8]),
259 (0, [0; 8]),
260 (0, [0; 8]),
261 (0, [0; 8]),
262 (0, [0; 8]),
263 (0, [0; 8]),
264 (0, [0; 8]),
265 (0, [0; 8]),
266 (0, [0; 8]),
267 (0, [0; 8]),
268 (0, [0; 8]),
269 (0, [0; 8]),
270 (0, [0; 8]),
271 (0, [0; 8]),
272 (0, [0; 8]),
273];
274
275pub(crate) const NEED_ESCAPED: [u8; 256] = [
276 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
277 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
278 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
279 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
281 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
282 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
283 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
284];
285
286#[inline(always)]
287fn format_string(value: &str, dst: &mut [u8]) -> usize {
288 #[cfg(target_arch = "aarch64")]
289 {
290 let has_neon = cfg!(target_os = "macos") || std::arch::is_aarch64_feature_detected!("neon");
291 if has_neon {
292 unsafe { simd::neon::format_string(value, dst) }
293 } else {
294 simd::v128::format_string(value, dst)
295 }
296 }
297
298 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
299 {
300 if is_x86_feature_detected!("avx512f") {
301 unsafe { simd::avx512::format_string(value, dst) }
302 } else if is_x86_feature_detected!("avx2") {
303 unsafe { simd::avx2::format_string(value, dst) }
304 } else if is_x86_feature_detected!("sse2") {
305 unsafe { simd::sse2::format_string(value, dst) }
306 } else {
307 simd::v128::format_string(value, dst)
308 }
309 }
310
311 #[cfg(not(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64")))]
312 {
313 simd::v128::format_string(value, dst)
314 }
315}
316
317pub fn escape(value: &str) -> String {
318 let capacity = value.len() * 6 + 32 + 3;
319 let mut buf = Vec::with_capacity(capacity);
320 #[allow(clippy::uninit_vec)]
321 unsafe {
322 buf.set_len(capacity)
323 };
324 let cnt = format_string(value, &mut buf);
325 unsafe { buf.set_len(cnt) };
326 unsafe { String::from_utf8_unchecked(buf) }
327}
328
329pub fn escape_into<S: AsRef<str>>(value: S, dst: &mut Vec<u8>) -> usize {
330 let value = value.as_ref();
331 let needed_capacity = value.len() * 6 + 32 + 3;
332
333 dst.reserve(needed_capacity);
335
336 let old_len = dst.len();
337
338 unsafe {
341 let spare =
343 std::slice::from_raw_parts_mut(dst.as_mut_ptr().add(old_len), dst.capacity() - old_len);
344 let cnt = format_string(value, spare);
345 dst.set_len(old_len + cnt);
346 cnt
347 }
348}
349
350#[cfg(test)]
351mod tests {
352 use std::fs::read_dir;
353 use std::path::{Path, PathBuf};
354
355 use rand::seq::SliceRandom;
356
357 use super::*;
358
359 #[test]
360 fn test_escape_ascii_json_string() {
361 let fixture = r#"abcdefghijklmnopqrstuvwxyz .*? hello world escape json string"#;
362 assert_eq!(escape(fixture), serde_json::to_string(fixture).unwrap());
363 }
364
365 #[test]
366 fn test_escape_json_string() {
367 let mut fixture = String::new();
368 for i in 0u8..=0x1F {
369 fixture.push(i as char);
370 }
371 fixture.push('\t');
372 fixture.push('\x08');
373 fixture.push('\x09');
374 fixture.push('\x0A');
375 fixture.push('\x0C');
376 fixture.push('\x0D');
377 fixture.push('\x22');
378 fixture.push('\x5C');
379 fixture.push_str("normal string");
380 fixture.push('π');
381 fixture.push_str("δΈζ English π \nβ π");
382 escape(fixture.as_str());
383 assert_eq!(
384 escape(fixture.as_str()),
385 serde_json::to_string(fixture.as_str()).unwrap(),
386 "fixture: {:?}",
387 fixture
388 );
389 }
390
391 #[test]
394 fn test_empty_string() {
395 assert_eq!(escape(""), r#""""#);
396 }
397
398 #[test]
399 fn test_very_small_strings() {
400 assert_eq!(escape("a"), r#""a""#);
402 assert_eq!(escape("ab"), r#""ab""#);
403 assert_eq!(escape("hello"), r#""hello""#);
404 assert_eq!(escape("hello\n"), r#""hello\n""#);
405 assert_eq!(escape("\""), r#""\"""#);
406 assert_eq!(escape("\\"), r#""\\""#);
407 assert_eq!(escape("\t"), r#""\t""#);
408 assert_eq!(escape("\r\n"), r#""\r\n""#);
409 }
410
411 #[test]
412 fn test_small_strings_16_bytes() {
413 let s16 = "0123456789abcdef";
415 assert_eq!(s16.len(), 16);
416 assert_eq!(escape(s16), serde_json::to_string(s16).unwrap());
417
418 let s16_esc = "01234567\t9abcde";
420 assert_eq!(s16_esc.len(), 15); assert_eq!(escape(s16_esc), serde_json::to_string(s16_esc).unwrap());
422 }
423
424 #[test]
425 fn test_medium_strings_32_bytes() {
426 let s32 = "0123456789abcdef0123456789abcdef";
428 assert_eq!(s32.len(), 32);
429 assert_eq!(escape(s32), serde_json::to_string(s32).unwrap());
430
431 let s32_esc = "0123456789abcde\"0123456789abcde";
433 assert_eq!(escape(s32_esc), serde_json::to_string(s32_esc).unwrap());
434 }
435
436 #[test]
437 fn test_large_strings_128_bytes() {
438 let s128 = "0123456789abcdef".repeat(8);
440 assert_eq!(s128.len(), 128);
441 assert_eq!(escape(&s128), serde_json::to_string(&s128).unwrap());
442
443 let mut s128_esc = String::new();
445 for i in 0..8 {
446 if i % 2 == 0 {
447 s128_esc.push_str("0123456789abcd\n");
448 } else {
449 s128_esc.push_str("0123456789abcd\"");
450 }
451 }
452 assert_eq!(escape(&s128_esc), serde_json::to_string(&s128_esc).unwrap());
453 }
454
455 #[test]
456 fn test_unaligned_data() {
457 for offset in 0..32 {
459 let padding = " ".repeat(offset);
460 let test_str = format!("{}{}", padding, "test\nstring\"with\\escapes");
461 let result = escape(&test_str[offset..]);
462 let expected = serde_json::to_string(&test_str[offset..]).unwrap();
463 assert_eq!(result, expected, "Failed at offset {}", offset);
464 }
465 }
466
467 #[test]
468 fn test_sparse_escapes() {
469 let mut s = String::new();
471 s.push('"');
472 s.push_str(&"a".repeat(500));
473 s.push('\\');
474 assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
475 }
476
477 #[test]
478 fn test_dense_escapes() {
479 let s = "\"\\\"\\\"\\\"\\".repeat(50);
481 assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
482
483 let mut ctrl = String::new();
485 for _ in 0..10 {
486 for i in 0u8..32 {
487 ctrl.push(i as char);
488 }
489 }
490 assert_eq!(escape(&ctrl), serde_json::to_string(&ctrl).unwrap());
491 }
492
493 #[test]
494 fn test_boundary_conditions() {
495 for size in 250..260 {
497 let s = "a".repeat(size);
498 assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
499
500 let mut s_esc = "a".repeat(size - 1);
502 s_esc.push('"');
503 assert_eq!(escape(&s_esc), serde_json::to_string(&s_esc).unwrap());
504 }
505 }
506
507 #[test]
508 fn test_all_escape_types() {
509 assert_eq!(escape("\x00"), r#""\u0000""#);
511 assert_eq!(escape("\x08"), r#""\b""#);
512 assert_eq!(escape("\x09"), r#""\t""#);
513 assert_eq!(escape("\x0A"), r#""\n""#);
514 assert_eq!(escape("\x0C"), r#""\f""#);
515 assert_eq!(escape("\x0D"), r#""\r""#);
516 assert_eq!(escape("\x1F"), r#""\u001f""#);
517 assert_eq!(escape("\""), r#""\"""#);
518 assert_eq!(escape("\\"), r#""\\""#);
519
520 for i in 0u8..32 {
522 let s = String::from_utf8(vec![i]).unwrap();
523 let result = escape(&s);
524 let expected = String::from_utf8(QUOTE_TAB[i as usize].1.to_vec())
525 .unwrap()
526 .trim_end_matches('\0')
527 .to_string();
528 assert_eq!(
529 result,
530 format!("\"{}\"", expected),
531 "Failed for byte 0x{:02x}",
532 i
533 );
534 }
535 }
536
537 #[test]
538 fn test_mixed_content() {
539 let mixed = r#"Hello "World"!
541 Tab: Here
542 Emoji: π Chinese: δΈζ
543 Math: ββ«β Music: π
544 Escape: \" \\ \n \r \t"#;
545 assert_eq!(escape(mixed), serde_json::to_string(mixed).unwrap());
546 }
547
548 #[test]
549 fn test_repeated_patterns() {
550 let pattern1 = "abcd".repeat(100);
552 assert_eq!(escape(&pattern1), serde_json::to_string(&pattern1).unwrap());
553
554 let pattern2 = "a\"b\"".repeat(100);
555 assert_eq!(escape(&pattern2), serde_json::to_string(&pattern2).unwrap());
556
557 let pattern3 = "\t\n".repeat(100);
558 assert_eq!(escape(&pattern3), serde_json::to_string(&pattern3).unwrap());
559 }
560
561 #[test]
562 fn test_rxjs() {
563 let mut sources = Vec::new();
564 read_dir_recursive("node_modules/rxjs/src", &mut sources, |p| {
565 matches!(p.extension().and_then(|e| e.to_str()), Some("ts"))
566 })
567 .unwrap();
568 assert!(!sources.is_empty());
569 sources.shuffle(&mut rand::rng());
570 for source in sources
571 .iter()
572 .take(if cfg!(miri) { 10 } else { sources.len() })
573 {
574 assert_eq!(escape(source), serde_json::to_string(&source).unwrap());
575 let mut output = String::new();
576 escape_into(source, unsafe { output.as_mut_vec() });
577 assert_eq!(output, serde_json::to_string(&source).unwrap());
578 }
579 }
580
581 #[test]
582 fn test_sources() {
583 for source in load_affine_sources().unwrap() {
584 assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
585 let mut output = String::with_capacity(source.len() * 6 + 32 + 3);
586 escape_into(&source, unsafe { output.as_mut_vec() });
587 assert_eq!(output, serde_json::to_string(&source).unwrap());
588 }
589 }
590
591 fn load_affine_sources() -> Result<impl Iterator<Item = String>, std::io::Error> {
592 let mut sources = Vec::new();
593 read_dir_recursive("fixtures", &mut sources, |p| {
594 matches!(
595 p.extension().and_then(|e| e.to_str()),
596 Some("ts") | Some("tsx") | Some("js") | Some("mjs") | Some("cjs")
597 )
598 })?;
599 assert!(!sources.is_empty());
600 let len = sources.len();
601 sources.shuffle(&mut rand::rng());
602 Ok(sources.into_iter().take(if cfg!(miri) { 10 } else { len }))
603 }
604
605 fn read_dir_recursive<P: AsRef<Path>, F: Fn(PathBuf) -> bool + Copy>(
606 dir: P,
607 sources: &mut Vec<String>,
608 f: F,
609 ) -> Result<(), std::io::Error> {
610 let dir = read_dir(dir)?;
611 for entry in dir {
612 let p = entry?;
613 let metadata = std::fs::metadata(p.path())?;
614 if metadata.is_file() && f(p.path()) {
615 sources.push(std::fs::read_to_string(p.path())?);
616 }
617 if metadata.is_dir() {
618 read_dir_recursive(p.path(), sources, f)?;
619 }
620 }
621 Ok(())
622 }
623}