1#![allow(clippy::incompatible_msrv)]
6
7#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
8use std::arch::is_x86_feature_detected;
9
10mod simd;
11
12pub(crate) const QUOTE_TAB: [(u8, [u8; 8]); 256] = [
13 (6, *b"\\u0000\0\0"),
15 (6, *b"\\u0001\0\0"),
16 (6, *b"\\u0002\0\0"),
17 (6, *b"\\u0003\0\0"),
18 (6, *b"\\u0004\0\0"),
19 (6, *b"\\u0005\0\0"),
20 (6, *b"\\u0006\0\0"),
21 (6, *b"\\u0007\0\0"),
22 (2, *b"\\b\0\0\0\0\0\0"),
23 (2, *b"\\t\0\0\0\0\0\0"),
24 (2, *b"\\n\0\0\0\0\0\0"),
25 (6, *b"\\u000b\0\0"),
26 (2, *b"\\f\0\0\0\0\0\0"),
27 (2, *b"\\r\0\0\0\0\0\0"),
28 (6, *b"\\u000e\0\0"),
29 (6, *b"\\u000f\0\0"),
30 (6, *b"\\u0010\0\0"),
31 (6, *b"\\u0011\0\0"),
32 (6, *b"\\u0012\0\0"),
33 (6, *b"\\u0013\0\0"),
34 (6, *b"\\u0014\0\0"),
35 (6, *b"\\u0015\0\0"),
36 (6, *b"\\u0016\0\0"),
37 (6, *b"\\u0017\0\0"),
38 (6, *b"\\u0018\0\0"),
39 (6, *b"\\u0019\0\0"),
40 (6, *b"\\u001a\0\0"),
41 (6, *b"\\u001b\0\0"),
42 (6, *b"\\u001c\0\0"),
43 (6, *b"\\u001d\0\0"),
44 (6, *b"\\u001e\0\0"),
45 (6, *b"\\u001f\0\0"),
46 (0, [0; 8]),
48 (0, [0; 8]),
49 (2, *b"\\\"\0\0\0\0\0\0"),
50 (0, [0; 8]),
51 (0, [0; 8]),
52 (0, [0; 8]),
53 (0, [0; 8]),
54 (0, [0; 8]),
55 (0, [0; 8]),
56 (0, [0; 8]),
57 (0, [0; 8]),
58 (0, [0; 8]),
59 (0, [0; 8]),
60 (0, [0; 8]),
61 (0, [0; 8]),
62 (0, [0; 8]),
63 (0, [0; 8]),
65 (0, [0; 8]),
66 (0, [0; 8]),
67 (0, [0; 8]),
68 (0, [0; 8]),
69 (0, [0; 8]),
70 (0, [0; 8]),
71 (0, [0; 8]),
72 (0, [0; 8]),
73 (0, [0; 8]),
74 (0, [0; 8]),
75 (0, [0; 8]),
76 (0, [0; 8]),
77 (0, [0; 8]),
78 (0, [0; 8]),
79 (0, [0; 8]),
80 (0, [0; 8]),
82 (0, [0; 8]),
83 (0, [0; 8]),
84 (0, [0; 8]),
85 (0, [0; 8]),
86 (0, [0; 8]),
87 (0, [0; 8]),
88 (0, [0; 8]),
89 (0, [0; 8]),
90 (0, [0; 8]),
91 (0, [0; 8]),
92 (0, [0; 8]),
93 (0, [0; 8]),
94 (0, [0; 8]),
95 (0, [0; 8]),
96 (0, [0; 8]),
97 (0, [0; 8]),
99 (0, [0; 8]),
100 (0, [0; 8]),
101 (0, [0; 8]),
102 (0, [0; 8]),
103 (0, [0; 8]),
104 (0, [0; 8]),
105 (0, [0; 8]),
106 (0, [0; 8]),
107 (0, [0; 8]),
108 (0, [0; 8]),
109 (0, [0; 8]),
110 (2, *b"\\\\\0\0\0\0\0\0"),
111 (0, [0; 8]),
112 (0, [0; 8]),
113 (0, [0; 8]),
114 (0, [0; 8]),
116 (0, [0; 8]),
117 (0, [0; 8]),
118 (0, [0; 8]),
119 (0, [0; 8]),
120 (0, [0; 8]),
121 (0, [0; 8]),
122 (0, [0; 8]),
123 (0, [0; 8]),
124 (0, [0; 8]),
125 (0, [0; 8]),
126 (0, [0; 8]),
127 (0, [0; 8]),
128 (0, [0; 8]),
129 (0, [0; 8]),
130 (0, [0; 8]),
131 (0, [0; 8]),
132 (0, [0; 8]),
133 (0, [0; 8]),
134 (0, [0; 8]),
135 (0, [0; 8]),
136 (0, [0; 8]),
137 (0, [0; 8]),
138 (0, [0; 8]),
139 (0, [0; 8]),
140 (0, [0; 8]),
141 (0, [0; 8]),
142 (0, [0; 8]),
143 (0, [0; 8]),
144 (0, [0; 8]),
145 (0, [0; 8]),
146 (0, [0; 8]),
147 (0, [0; 8]),
148 (0, [0; 8]),
149 (0, [0; 8]),
150 (0, [0; 8]),
151 (0, [0; 8]),
152 (0, [0; 8]),
153 (0, [0; 8]),
154 (0, [0; 8]),
155 (0, [0; 8]),
156 (0, [0; 8]),
157 (0, [0; 8]),
158 (0, [0; 8]),
159 (0, [0; 8]),
160 (0, [0; 8]),
161 (0, [0; 8]),
162 (0, [0; 8]),
163 (0, [0; 8]),
164 (0, [0; 8]),
165 (0, [0; 8]),
166 (0, [0; 8]),
167 (0, [0; 8]),
168 (0, [0; 8]),
169 (0, [0; 8]),
170 (0, [0; 8]),
171 (0, [0; 8]),
172 (0, [0; 8]),
173 (0, [0; 8]),
174 (0, [0; 8]),
175 (0, [0; 8]),
176 (0, [0; 8]),
177 (0, [0; 8]),
178 (0, [0; 8]),
179 (0, [0; 8]),
180 (0, [0; 8]),
181 (0, [0; 8]),
182 (0, [0; 8]),
183 (0, [0; 8]),
184 (0, [0; 8]),
185 (0, [0; 8]),
186 (0, [0; 8]),
187 (0, [0; 8]),
188 (0, [0; 8]),
189 (0, [0; 8]),
190 (0, [0; 8]),
191 (0, [0; 8]),
192 (0, [0; 8]),
193 (0, [0; 8]),
194 (0, [0; 8]),
195 (0, [0; 8]),
196 (0, [0; 8]),
197 (0, [0; 8]),
198 (0, [0; 8]),
199 (0, [0; 8]),
200 (0, [0; 8]),
201 (0, [0; 8]),
202 (0, [0; 8]),
203 (0, [0; 8]),
204 (0, [0; 8]),
205 (0, [0; 8]),
206 (0, [0; 8]),
207 (0, [0; 8]),
208 (0, [0; 8]),
209 (0, [0; 8]),
210 (0, [0; 8]),
211 (0, [0; 8]),
212 (0, [0; 8]),
213 (0, [0; 8]),
214 (0, [0; 8]),
215 (0, [0; 8]),
216 (0, [0; 8]),
217 (0, [0; 8]),
218 (0, [0; 8]),
219 (0, [0; 8]),
220 (0, [0; 8]),
221 (0, [0; 8]),
222 (0, [0; 8]),
223 (0, [0; 8]),
224 (0, [0; 8]),
225 (0, [0; 8]),
226 (0, [0; 8]),
227 (0, [0; 8]),
228 (0, [0; 8]),
229 (0, [0; 8]),
230 (0, [0; 8]),
231 (0, [0; 8]),
232 (0, [0; 8]),
233 (0, [0; 8]),
234 (0, [0; 8]),
235 (0, [0; 8]),
236 (0, [0; 8]),
237 (0, [0; 8]),
238 (0, [0; 8]),
239 (0, [0; 8]),
240 (0, [0; 8]),
241 (0, [0; 8]),
242 (0, [0; 8]),
243 (0, [0; 8]),
244 (0, [0; 8]),
245 (0, [0; 8]),
246 (0, [0; 8]),
247 (0, [0; 8]),
248 (0, [0; 8]),
249 (0, [0; 8]),
250 (0, [0; 8]),
251 (0, [0; 8]),
252 (0, [0; 8]),
253 (0, [0; 8]),
254 (0, [0; 8]),
255 (0, [0; 8]),
256 (0, [0; 8]),
257 (0, [0; 8]),
258 (0, [0; 8]),
259 (0, [0; 8]),
260 (0, [0; 8]),
261 (0, [0; 8]),
262 (0, [0; 8]),
263 (0, [0; 8]),
264 (0, [0; 8]),
265 (0, [0; 8]),
266 (0, [0; 8]),
267 (0, [0; 8]),
268 (0, [0; 8]),
269 (0, [0; 8]),
270 (0, [0; 8]),
271 (0, [0; 8]),
272 (0, [0; 8]),
273 (0, [0; 8]),
274 (0, [0; 8]),
275];
276
277pub(crate) const NEED_ESCAPED: [u8; 256] = [
278 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
279 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
281 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
282 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
283 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
284 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
285 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
286];
287
288#[inline(always)]
289fn format_string(value: &str, dst: &mut [u8]) -> usize {
290 #[cfg(target_arch = "aarch64")]
291 {
292 let has_neon = cfg!(target_os = "macos") || std::arch::is_aarch64_feature_detected!("neon");
293 if has_neon {
294 unsafe { simd::neon::format_string(value, dst) }
295 } else {
296 simd::v128::format_string(value, dst)
297 }
298 }
299
300 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
301 {
302 #[cfg(feature = "avx512")]
303 {
304 if is_x86_feature_detected!("avx512f") {
305 return unsafe { simd::avx512::format_string(value, dst) };
306 }
307 }
308 if is_x86_feature_detected!("avx2") {
309 unsafe { simd::avx2::format_string(value, dst) }
310 } else if is_x86_feature_detected!("sse2") {
311 unsafe { simd::sse2::format_string(value, dst) }
312 } else {
313 simd::v128::format_string(value, dst)
314 }
315 }
316
317 #[cfg(not(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64")))]
318 {
319 simd::v128::format_string(value, dst)
320 }
321}
322
323pub fn escape(value: &str) -> String {
324 let capacity = value.len() * 6 + 32 + 3;
325 let mut buf = Vec::with_capacity(capacity);
326 #[allow(clippy::uninit_vec)]
327 unsafe {
328 buf.set_len(capacity)
329 };
330 let cnt = format_string(value, &mut buf);
331 unsafe { buf.set_len(cnt) };
332 unsafe { String::from_utf8_unchecked(buf) }
333}
334
335pub fn escape_into<S: AsRef<str>>(value: S, dst: &mut Vec<u8>) {
339 let value = value.as_ref();
340 let old_len = dst.len();
341
342 unsafe {
345 let spare =
347 std::slice::from_raw_parts_mut(dst.as_mut_ptr().add(old_len), dst.capacity() - old_len);
348 let cnt = format_string(value, spare);
349 dst.set_len(old_len + cnt);
350 }
351}
352
353#[cfg(test)]
354mod tests {
355 use std::fs::read_dir;
356 use std::path::{Path, PathBuf};
357
358 use rand::seq::SliceRandom;
359
360 use super::*;
361
362 #[test]
363 fn test_escape_ascii_json_string() {
364 let fixture = r#"abcdefghijklmnopqrstuvwxyz .*? hello world escape json string"#;
365 assert_eq!(escape(fixture), serde_json::to_string(fixture).unwrap());
366 }
367
368 #[test]
369 fn test_escape_json_string() {
370 let mut fixture = String::new();
371 for i in 0u8..=0x1F {
372 fixture.push(i as char);
373 }
374 fixture.push('\t');
375 fixture.push('\x08');
376 fixture.push('\x09');
377 fixture.push('\x0A');
378 fixture.push('\x0C');
379 fixture.push('\x0D');
380 fixture.push('\x22');
381 fixture.push('\x5C');
382 fixture.push_str("normal string");
383 fixture.push('π');
384 fixture.push_str("δΈζ English π \nβ π");
385 escape(fixture.as_str());
386 assert_eq!(
387 escape(fixture.as_str()),
388 serde_json::to_string(fixture.as_str()).unwrap(),
389 "fixture: {:?}",
390 fixture
391 );
392 }
393
394 #[test]
397 fn test_empty_string() {
398 assert_eq!(escape(""), r#""""#);
399 }
400
401 #[test]
402 fn test_very_small_strings() {
403 assert_eq!(escape("a"), r#""a""#);
405 assert_eq!(escape("ab"), r#""ab""#);
406 assert_eq!(escape("hello"), r#""hello""#);
407 assert_eq!(escape("hello\n"), r#""hello\n""#);
408 assert_eq!(escape("\""), r#""\"""#);
409 assert_eq!(escape("\\"), r#""\\""#);
410 assert_eq!(escape("\t"), r#""\t""#);
411 assert_eq!(escape("\r\n"), r#""\r\n""#);
412 }
413
414 #[test]
415 fn test_small_strings_16_bytes() {
416 let s16 = "0123456789abcdef";
418 assert_eq!(s16.len(), 16);
419 assert_eq!(escape(s16), serde_json::to_string(s16).unwrap());
420
421 let s16_esc = "01234567\t9abcde";
423 assert_eq!(s16_esc.len(), 15); assert_eq!(escape(s16_esc), serde_json::to_string(s16_esc).unwrap());
425 }
426
427 #[test]
428 fn test_medium_strings_32_bytes() {
429 let s32 = "0123456789abcdef0123456789abcdef";
431 assert_eq!(s32.len(), 32);
432 assert_eq!(escape(s32), serde_json::to_string(s32).unwrap());
433
434 let s32_esc = "0123456789abcde\"0123456789abcde";
436 assert_eq!(escape(s32_esc), serde_json::to_string(s32_esc).unwrap());
437 }
438
439 #[test]
440 fn test_large_strings_128_bytes() {
441 let s128 = "0123456789abcdef".repeat(8);
443 assert_eq!(s128.len(), 128);
444 assert_eq!(escape(&s128), serde_json::to_string(&s128).unwrap());
445
446 let mut s128_esc = String::new();
448 for i in 0..8 {
449 if i % 2 == 0 {
450 s128_esc.push_str("0123456789abcd\n");
451 } else {
452 s128_esc.push_str("0123456789abcd\"");
453 }
454 }
455 assert_eq!(escape(&s128_esc), serde_json::to_string(&s128_esc).unwrap());
456 }
457
458 #[test]
459 fn test_unaligned_data() {
460 for offset in 0..32 {
462 let padding = " ".repeat(offset);
463 let test_str = format!("{}{}", padding, "test\nstring\"with\\escapes");
464 let result = escape(&test_str[offset..]);
465 let expected = serde_json::to_string(&test_str[offset..]).unwrap();
466 assert_eq!(result, expected, "Failed at offset {}", offset);
467 }
468 }
469
470 #[test]
471 fn test_sparse_escapes() {
472 let mut s = String::new();
474 s.push('"');
475 s.push_str(&"a".repeat(500));
476 s.push('\\');
477 assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
478 }
479
480 #[test]
481 fn test_dense_escapes() {
482 let s = "\"\\\"\\\"\\\"\\".repeat(50);
484 assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
485
486 let mut ctrl = String::new();
488 for _ in 0..10 {
489 for i in 0u8..32 {
490 ctrl.push(i as char);
491 }
492 }
493 assert_eq!(escape(&ctrl), serde_json::to_string(&ctrl).unwrap());
494 }
495
496 #[test]
497 fn test_boundary_conditions() {
498 for size in 250..260 {
500 let s = "a".repeat(size);
501 assert_eq!(escape(&s), serde_json::to_string(&s).unwrap());
502
503 let mut s_esc = "a".repeat(size - 1);
505 s_esc.push('"');
506 assert_eq!(escape(&s_esc), serde_json::to_string(&s_esc).unwrap());
507 }
508 }
509
510 #[test]
511 fn test_all_escape_types() {
512 assert_eq!(escape("\x00"), r#""\u0000""#);
514 assert_eq!(escape("\x08"), r#""\b""#);
515 assert_eq!(escape("\x09"), r#""\t""#);
516 assert_eq!(escape("\x0A"), r#""\n""#);
517 assert_eq!(escape("\x0C"), r#""\f""#);
518 assert_eq!(escape("\x0D"), r#""\r""#);
519 assert_eq!(escape("\x1F"), r#""\u001f""#);
520 assert_eq!(escape("\""), r#""\"""#);
521 assert_eq!(escape("\\"), r#""\\""#);
522
523 for i in 0u8..32 {
525 let s = String::from_utf8(vec![i]).unwrap();
526 let result = escape(&s);
527 let expected = String::from_utf8(QUOTE_TAB[i as usize].1.to_vec())
528 .unwrap()
529 .trim_end_matches('\0')
530 .to_string();
531 assert_eq!(
532 result,
533 format!("\"{}\"", expected),
534 "Failed for byte 0x{:02x}",
535 i
536 );
537 }
538 }
539
540 #[test]
541 fn test_mixed_content() {
542 let mixed = r#"Hello "World"!
544 Tab: Here
545 Emoji: π Chinese: δΈζ
546 Math: ββ«β Music: π
547 Escape: \" \\ \n \r \t"#;
548 assert_eq!(escape(mixed), serde_json::to_string(mixed).unwrap());
549 }
550
551 #[test]
552 fn test_repeated_patterns() {
553 let pattern1 = "abcd".repeat(100);
555 assert_eq!(escape(&pattern1), serde_json::to_string(&pattern1).unwrap());
556
557 let pattern2 = "a\"b\"".repeat(100);
558 assert_eq!(escape(&pattern2), serde_json::to_string(&pattern2).unwrap());
559
560 let pattern3 = "\t\n".repeat(100);
561 assert_eq!(escape(&pattern3), serde_json::to_string(&pattern3).unwrap());
562 }
563
564 #[test]
565 fn test_rxjs() {
566 let mut sources = Vec::new();
567 read_dir_recursive("node_modules/rxjs/src", &mut sources, |p| {
568 matches!(p.extension().and_then(|e| e.to_str()), Some("ts"))
569 })
570 .unwrap();
571 assert!(!sources.is_empty());
572 sources.shuffle(&mut rand::rng());
573 for source in sources
574 .iter()
575 .take(if cfg!(miri) { 10 } else { sources.len() })
576 {
577 assert_eq!(escape(source), serde_json::to_string(&source).unwrap());
578 let mut output = String::with_capacity(source.len() * 6 + 32 + 3);
579 escape_into(source, unsafe { output.as_mut_vec() });
580 assert_eq!(output, serde_json::to_string(&source).unwrap());
581 }
582 }
583
584 #[test]
585 fn test_sources() {
586 for source in load_affine_sources().unwrap() {
587 assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
588 let mut output = String::with_capacity(source.len() * 6 + 32 + 3);
589 escape_into(&source, unsafe { output.as_mut_vec() });
590 assert_eq!(output, serde_json::to_string(&source).unwrap());
591 }
592 }
593
594 fn load_affine_sources() -> Result<impl Iterator<Item = String>, std::io::Error> {
595 let mut sources = Vec::new();
596 read_dir_recursive("fixtures", &mut sources, |p| {
597 matches!(
598 p.extension().and_then(|e| e.to_str()),
599 Some("ts") | Some("tsx") | Some("js") | Some("mjs") | Some("cjs")
600 )
601 })?;
602 assert!(!sources.is_empty());
603 let len = sources.len();
604 sources.shuffle(&mut rand::rng());
605 Ok(sources.into_iter().take(if cfg!(miri) { 10 } else { len }))
606 }
607
608 fn read_dir_recursive<P: AsRef<Path>, F: Fn(PathBuf) -> bool + Copy>(
609 dir: P,
610 sources: &mut Vec<String>,
611 f: F,
612 ) -> Result<(), std::io::Error> {
613 let dir = read_dir(dir)?;
614 for entry in dir {
615 let p = entry?;
616 let metadata = std::fs::metadata(p.path())?;
617 if metadata.is_file() && f(p.path()) {
618 sources.push(std::fs::read_to_string(p.path())?);
619 }
620 if metadata.is_dir() {
621 read_dir_recursive(p.path(), sources, f)?;
622 }
623 }
624 Ok(())
625 }
626}