1use std::{
2 fmt::Debug,
3 hash::Hash,
4 mem::{forget, transmute, ManuallyDrop},
5 ops::Deref,
6};
7
8use debug_unreachable::debug_unreachable;
9
10use crate::{
11 macros::{get_hash, impl_from_alias},
12 tagged_value::TaggedValue,
13 wtf8::Wtf8,
14 Atom, DYNAMIC_TAG, INLINE_TAG, LEN_MASK, LEN_OFFSET, TAG_MASK,
15};
16
17#[repr(transparent)]
22pub struct Wtf8Atom {
23 pub(crate) unsafe_data: TaggedValue,
24}
25
26impl Wtf8Atom {
27 #[inline(always)]
28 pub fn new<S>(s: S) -> Self
29 where
30 Self: From<S>,
31 {
32 Self::from(s)
33 }
34
35 pub fn try_into_atom(self) -> Result<Atom, Wtf8Atom> {
40 if self.as_str().is_some() {
41 let atom = ManuallyDrop::new(self);
42 Ok(Atom {
43 unsafe_data: atom.unsafe_data,
44 })
45 } else {
46 Err(self)
47 }
48 }
49
50 #[inline(always)]
51 fn tag(&self) -> u8 {
52 self.unsafe_data.tag() & TAG_MASK
53 }
54
55 #[inline(always)]
57 fn is_dynamic(&self) -> bool {
58 self.tag() == DYNAMIC_TAG
59 }
60}
61
62impl Default for Wtf8Atom {
63 #[inline(never)]
64 fn default() -> Self {
65 Wtf8Atom::new("")
66 }
67}
68
69unsafe impl Send for Wtf8Atom {}
71
72unsafe impl Sync for Wtf8Atom {}
74
75impl Debug for Wtf8Atom {
76 #[inline]
77 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78 Debug::fmt(&**self, f)
79 }
80}
81
82#[cfg(feature = "serde")]
83impl serde::ser::Serialize for Wtf8Atom {
84 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
85 where
86 S: serde::ser::Serializer,
87 {
88 use crate::wtf8::Wtf8;
89 fn convert_wtf8_to_raw(s: &Wtf8) -> String {
90 let mut result = String::new();
91 let mut iter = s.code_points().peekable();
92
93 while let Some(code_point) = iter.next() {
94 if let Some(c) = code_point.to_char() {
95 if c == '\\' && iter.peek().map(|cp| cp.to_u32()) == Some('u' as u32) {
108 let mut lookahead = iter.clone();
110 lookahead.next(); let mut hex_count = 0;
113 let mut all_hex = true;
114 for _ in 0..4 {
115 if let Some(next_cp) = lookahead.next() {
116 if let Some(next_c) = next_cp.to_char() {
117 if next_c.is_ascii_hexdigit() {
118 hex_count += 1;
119 } else {
120 all_hex = false;
121 break;
122 }
123 } else {
124 all_hex = false;
125 break;
126 }
127 } else {
128 all_hex = false;
129 break;
130 }
131 }
132
133 if hex_count == 4 && all_hex {
135 iter.next(); result.push_str("\\\\u");
137 } else {
138 result.push(c);
139 }
140 } else {
141 result.push(c)
142 }
143 } else {
144 result.push_str(format!("\\u{:04X}", code_point.to_u32()).as_str());
147 }
148 }
149
150 result
151 }
152
153 serializer.serialize_str(&convert_wtf8_to_raw(self))
154 }
155}
156
157#[cfg(feature = "serde")]
158impl<'de> serde::de::Deserialize<'de> for Wtf8Atom {
159 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
160 where
161 D: serde::Deserializer<'de>,
162 {
163 use crate::wtf8::{CodePoint, Wtf8Buf};
164 fn convert_wtf8_string_to_wtf8(s: String) -> Wtf8Buf {
165 let mut iter = s.chars().peekable();
166 let mut result = Wtf8Buf::with_capacity(s.len());
167
168 while let Some(c) = iter.next() {
173 if c == '\\' {
174 if iter.peek() == Some(&'u') {
175 let _ = iter.next(); let d1 = iter.next();
180 let d2 = iter.next();
181 let d3 = iter.next();
182 let d4 = iter.next();
183
184 match (d1, d2, d3, d4) {
185 (Some(d1), Some(d2), Some(d3), Some(d4)) => {
186 let hex = format!("{d1}{d2}{d3}{d4}");
187 if let Ok(code_point) = u16::from_str_radix(&hex, 16) {
188 result.push(unsafe {
189 CodePoint::from_u32_unchecked(code_point as u32)
190 });
191 continue;
192 }
193
194 result.push_char('\\');
195 result.push_char('u');
196 result.push_char(d1);
197 result.push_char(d2);
198 result.push_char(d3);
199 result.push_char(d4);
200 }
201 (d1, d2, d3, d4) => {
202 result.push_char('\\');
203 result.push_char('u');
204
205 macro_rules! push_if_some {
206 ($expr:expr) => {
207 if let Some(c) = $expr {
208 result.push_char(c);
209 }
210 };
211 }
212
213 push_if_some!(d1);
214 push_if_some!(d2);
215 push_if_some!(d3);
216 push_if_some!(d4);
217 }
218 }
219 } else if iter.peek() == Some(&'\\') {
220 let _ = iter.next(); if iter.peek() == Some(&'u') {
224 let _ = iter.next(); result.push_char('\\');
226 result.push_char('u');
227 } else {
228 result.push_str("\\\\");
229 }
230 } else {
231 result.push_char(c);
232 }
233 } else {
234 result.push_char(c);
235 }
236 }
237 result
238 }
239
240 String::deserialize(deserializer).map(|v| convert_wtf8_string_to_wtf8(v).into())
241 }
242}
243
244impl PartialEq for Wtf8Atom {
245 #[inline]
246 fn eq(&self, other: &Self) -> bool {
247 let unsafe_data = self.unsafe_data;
248 let other_unsafe_data = other.unsafe_data;
249
250 if unsafe_data == other_unsafe_data {
251 return true;
252 }
253
254 let tag = unsafe_data.tag() & TAG_MASK;
255
256 if tag != (other_unsafe_data.tag() & TAG_MASK) {
257 return false;
258 }
259
260 match tag {
261 INLINE_TAG => false,
264 DYNAMIC_TAG => {
265 let this = unsafe { crate::dynamic::deref_from(unsafe_data) };
266 let other = unsafe { crate::dynamic::deref_from(other_unsafe_data) };
267
268 if this.header.header.hash != other.header.header.hash {
269 return false;
270 }
271
272 this.slice == other.slice
273 }
274 _ => unsafe { debug_unreachable!() },
275 }
276 }
277}
278
279impl Eq for Wtf8Atom {}
280
281impl Hash for Wtf8Atom {
282 #[inline(always)]
283 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
284 state.write_u64(self.get_hash());
285 }
286}
287
288impl Drop for Wtf8Atom {
289 #[inline(always)]
290 fn drop(&mut self) {
291 if self.is_dynamic() {
292 unsafe { drop(crate::dynamic::restore_arc(self.unsafe_data)) }
293 }
294 }
295}
296
297impl Clone for Wtf8Atom {
298 #[inline(always)]
299 fn clone(&self) -> Self {
300 Self::from_alias(self.unsafe_data)
301 }
302}
303
304impl Deref for Wtf8Atom {
305 type Target = Wtf8;
306
307 #[inline(always)]
308 fn deref(&self) -> &Self::Target {
309 self.as_wtf8()
310 }
311}
312
313impl AsRef<Wtf8> for Wtf8Atom {
314 #[inline(always)]
315 fn as_ref(&self) -> &Wtf8 {
316 self.as_wtf8()
317 }
318}
319
320impl PartialEq<Wtf8> for Wtf8Atom {
321 #[inline]
322 fn eq(&self, other: &Wtf8) -> bool {
323 self.as_wtf8() == other
324 }
325}
326
327impl PartialEq<crate::Atom> for Wtf8Atom {
328 #[inline]
329 fn eq(&self, other: &crate::Atom) -> bool {
330 self.as_str() == Some(other.as_str())
331 }
332}
333
334impl PartialEq<&'_ Wtf8> for Wtf8Atom {
335 #[inline]
336 fn eq(&self, other: &&Wtf8) -> bool {
337 self.as_wtf8() == *other
338 }
339}
340
341impl PartialEq<Wtf8Atom> for Wtf8 {
342 #[inline]
343 fn eq(&self, other: &Wtf8Atom) -> bool {
344 self == other.as_wtf8()
345 }
346}
347
348impl PartialEq<str> for Wtf8Atom {
349 #[inline]
350 fn eq(&self, other: &str) -> bool {
351 matches!(self.as_str(), Some(s) if s == other)
352 }
353}
354
355impl PartialEq<&str> for Wtf8Atom {
356 #[inline]
357 fn eq(&self, other: &&str) -> bool {
358 matches!(self.as_str(), Some(s) if s == *other)
359 }
360}
361
362impl Wtf8Atom {
363 pub(super) fn get_hash(&self) -> u64 {
364 get_hash!(self)
365 }
366
367 fn as_wtf8(&self) -> &Wtf8 {
368 match self.tag() {
369 DYNAMIC_TAG => unsafe {
370 let item = crate::dynamic::deref_from(self.unsafe_data);
371 Wtf8::from_bytes_unchecked(transmute::<&[u8], &'static [u8]>(&item.slice))
372 },
373 INLINE_TAG => {
374 let len = (self.unsafe_data.tag() & LEN_MASK) >> LEN_OFFSET;
375 let src = self.unsafe_data.data();
376 unsafe { Wtf8::from_bytes_unchecked(&src[..(len as usize)]) }
377 }
378 _ => unsafe { debug_unreachable!() },
379 }
380 }
381}
382
383impl_from_alias!(Wtf8Atom);
384
385#[cfg(test)]
386impl Wtf8Atom {
387 pub(crate) fn ref_count(&self) -> usize {
388 match self.tag() {
389 DYNAMIC_TAG => {
390 let ptr = unsafe { crate::dynamic::deref_from(self.unsafe_data) };
391
392 triomphe::ThinArc::strong_count(&ptr.0)
393 }
394 _ => 1,
395 }
396 }
397}
398
399#[cfg(test)]
400mod tests {
401 use super::*;
402 use crate::wtf8::{CodePoint, Wtf8Buf};
403
404 #[cfg(feature = "serde")]
405 #[test]
406 fn test_serialize_normal_utf8() {
407 let atom = Wtf8Atom::new("Hello, world!");
408 let serialized = serde_json::to_string(&atom).unwrap();
409 assert_eq!(serialized, "\"Hello, world!\"");
410 }
411
412 #[cfg(feature = "serde")]
413 #[test]
414 fn test_deserialize_normal_utf8() {
415 let json = "\"Hello, world!\"";
416 let atom: Wtf8Atom = serde_json::from_str(json).unwrap();
417 assert_eq!(atom.as_str(), Some("Hello, world!"));
418 }
419
420 #[cfg(feature = "serde")]
421 #[test]
422 fn test_serialize_unpaired_high_surrogate() {
423 let mut wtf8 = Wtf8Buf::new();
425 wtf8.push(unsafe { CodePoint::from_u32_unchecked(0xd800) });
426 let atom = Wtf8Atom::from(wtf8);
427
428 let serialized = serde_json::to_string(&atom).unwrap();
429 assert_eq!(serialized, "\"\\\\uD800\"");
431 }
432
433 #[cfg(feature = "serde")]
434 #[test]
435 fn test_serialize_unpaired_low_surrogate() {
436 let mut wtf8 = Wtf8Buf::new();
438 wtf8.push(unsafe { CodePoint::from_u32_unchecked(0xdc00) });
439 let atom = Wtf8Atom::from(wtf8);
440
441 let serialized = serde_json::to_string(&atom).unwrap();
442 assert_eq!(serialized, "\"\\\\uDC00\"");
444 }
445
446 #[cfg(feature = "serde")]
447 #[test]
448 fn test_serialize_multiple_surrogates() {
449 let mut wtf8 = Wtf8Buf::new();
451 wtf8.push_str("Hello ");
452 wtf8.push(unsafe { CodePoint::from_u32_unchecked(0xd800) });
453 wtf8.push_str(" World ");
454 wtf8.push(unsafe { CodePoint::from_u32_unchecked(0xdc00) });
455 let atom = Wtf8Atom::from(wtf8);
456
457 let serialized = serde_json::to_string(&atom).unwrap();
458 assert_eq!(serialized, "\"Hello \\\\uD800 World \\\\uDC00\"");
460 }
461
462 #[cfg(feature = "serde")]
463 #[test]
464 fn test_serialize_literal_backslash_u() {
465 let atom = Wtf8Atom::new("\\u0041");
467 let serialized = serde_json::to_string(&atom).unwrap();
468 assert_eq!(serialized, "\"\\\\\\\\u0041\"");
470 }
471
472 #[cfg(feature = "serde")]
473 #[test]
474 fn test_deserialize_escaped_backslash_u() {
475 let json = "\"\\\\uD800\"";
477 let atom: Wtf8Atom = serde_json::from_str(json).unwrap();
478 assert_eq!(atom.as_str(), None);
480 assert_eq!(atom.to_string_lossy(), "\u{FFFD}");
481 }
482
483 #[cfg(feature = "serde")]
484 #[test]
485 fn test_deserialize_unpaired_surrogates() {
486 let json = "\"\\\\uD800\""; let atom: Wtf8Atom = serde_json::from_str(json).unwrap();
488 assert_eq!(atom.as_str(), None);
490 assert_eq!(atom.to_string_lossy(), "\u{FFFD}");
492 }
493
494 #[cfg(feature = "serde")]
495 #[test]
496 fn test_round_trip_normal_string() {
497 let original = Wtf8Atom::new("Hello, δΈη! π");
498 let serialized = serde_json::to_string(&original).unwrap();
499 let deserialized: Wtf8Atom = serde_json::from_str(&serialized).unwrap();
500 assert_eq!(original.as_str(), deserialized.as_str());
501 }
502
503 #[cfg(feature = "serde")]
504 #[test]
505 fn test_round_trip_unpaired_surrogates() {
506 let mut wtf8 = Wtf8Buf::new();
508 wtf8.push_str("Before ");
509 wtf8.push(unsafe { CodePoint::from_u32_unchecked(0xd800) });
510 wtf8.push_str(" Middle ");
511 wtf8.push(unsafe { CodePoint::from_u32_unchecked(0xdc00) });
512 wtf8.push_str(" After");
513 let original = Wtf8Atom::from(wtf8);
514
515 let serialized = serde_json::to_string(&original).unwrap();
516 let deserialized: Wtf8Atom = serde_json::from_str(&serialized).unwrap();
517
518 assert_eq!(original, deserialized);
520
521 assert_eq!(original.to_string_lossy(), deserialized.to_string_lossy());
523 }
524
525 #[cfg(feature = "serde")]
526 #[test]
527 fn test_round_trip_mixed_content() {
528 let mut wtf8 = Wtf8Buf::new();
530 wtf8.push_str("Hello δΈη π ");
531 wtf8.push(unsafe { CodePoint::from_u32_unchecked(0xd83d) }); wtf8.push_str(" test ");
533 wtf8.push(unsafe { CodePoint::from_u32_unchecked(0xdca9) }); let original = Wtf8Atom::from(wtf8);
535
536 let serialized = serde_json::to_string(&original).unwrap();
537 let deserialized: Wtf8Atom = serde_json::from_str(&serialized).unwrap();
538
539 assert_eq!(original, deserialized);
540 }
541
542 #[cfg(feature = "serde")]
543 #[test]
544 fn test_empty_string() {
545 let atom = Wtf8Atom::new("");
546 let serialized = serde_json::to_string(&atom).unwrap();
547 assert_eq!(serialized, "\"\"");
548
549 let deserialized: Wtf8Atom = serde_json::from_str("\"\"").unwrap();
550 assert_eq!(deserialized.as_str(), Some(""));
551 }
552
553 #[cfg(feature = "serde")]
554 #[test]
555 fn test_special_characters() {
556 let test_cases = vec![
557 ("\"", "\"\\\"\""),
558 ("\n\r\t", "\"\\n\\r\\t\""), ("\\", "\"\\\\\""),
560 ("/", "\"/\""),
561 ];
562
563 for (input, expected) in test_cases {
564 let atom = Wtf8Atom::new(input);
565 let serialized = serde_json::to_string(&atom).unwrap();
566 assert_eq!(serialized, expected, "Failed for input: {input:?}");
567
568 let deserialized: Wtf8Atom = serde_json::from_str(&serialized).unwrap();
569 assert_eq!(deserialized.as_str(), Some(input));
570 }
571 }
572
573 #[cfg(feature = "serde")]
574 #[test]
575 fn test_consecutive_surrogates_not_paired() {
576 let mut wtf8 = Wtf8Buf::new();
579 wtf8.push(unsafe { CodePoint::from_u32_unchecked(0xd800) }); wtf8.push(unsafe { CodePoint::from_u32_unchecked(0xd800) }); let atom = Wtf8Atom::from(wtf8);
582
583 let serialized = serde_json::to_string(&atom).unwrap();
584 assert_eq!(serialized, "\"\\\\uD800\\\\uD800\"");
586
587 let deserialized: Wtf8Atom = serde_json::from_str(&serialized).unwrap();
588 assert_eq!(atom, deserialized);
589 }
590
591 #[cfg(feature = "serde")]
592 #[test]
593 fn test_deserialize_incomplete_escape() {
594 let json = "\"\\\\\\\\u123\""; let atom: Wtf8Atom = serde_json::from_str(json).unwrap();
597 assert_eq!(atom.as_str(), Some("\\u123"));
600 }
601
602 #[cfg(feature = "serde")]
603 #[test]
604 fn test_deserialize_invalid_hex() {
605 let json = "\"\\\\\\\\uGGGG\""; let atom: Wtf8Atom = serde_json::from_str(json).unwrap();
608 assert_eq!(atom.as_str(), Some("\\uGGGG"));
611 }
612
613 #[test]
614 fn test_try_into_atom_valid_utf8() {
615 let wtf8_atom = Wtf8Atom::new("Valid UTF-8 string");
616 let result = wtf8_atom.try_into_atom();
617 assert!(result.is_ok());
618 assert_eq!(result.unwrap().as_str(), "Valid UTF-8 string");
619 }
620
621 #[test]
622 fn test_try_into_atom_invalid_utf8() {
623 let mut wtf8 = Wtf8Buf::new();
625 wtf8.push(unsafe { CodePoint::from_u32_unchecked(0xd800) });
626 let wtf8_atom = Wtf8Atom::from(wtf8);
627
628 let result = wtf8_atom.try_into_atom();
629 assert!(result.is_err());
630 let err_atom = result.unwrap_err();
632 assert_eq!(err_atom.to_string_lossy(), "\u{FFFD}");
633 }
634
635 #[cfg(feature = "serde")]
636 #[test]
637 fn test_backslash_util_issue_11214() {
638 let atom =
639 Wtf8Atom::from("C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts");
640 let serialized = serde_json::to_string(&atom).unwrap();
641
642 assert!(
643 !serialized.contains("spec\\\\\\\\util"),
644 "Found quadruple backslashes in spec segment! Serialized: {serialized}"
645 );
646
647 assert!(
648 serialized.contains("spec\\\\util"),
649 "Expected double backslashes in spec segment not found! Serialized: {serialized}",
650 );
651
652 let expected = r#""C:\\github\\swc-plugin-coverage-instrument\\spec\\util\\verifier.ts""#;
654 assert_eq!(
655 serialized, expected,
656 "Serialized value should have consistent backslash escaping"
657 );
658
659 let deserialized: Wtf8Atom = serde_json::from_str(&serialized).unwrap();
661 assert_eq!(atom, deserialized);
662 }
663}