1#![deny(
9 unsafe_op_in_unsafe_fn,
10 clippy::undocumented_unsafe_blocks,
11 clippy::missing_safety_doc
12)]
13#![allow(clippy::module_name_repetitions)]
14
15mod builder;
16mod common;
17mod display;
18mod iter;
19mod str;
20
21#[cfg(test)]
22mod tests;
23
24use self::{iter::Windows, str::JsSliceIndex};
25use crate::display::{JsStrDisplayEscaped, JsStrDisplayLossy};
26#[doc(inline)]
27pub use crate::{
28 builder::{CommonJsStringBuilder, Latin1JsStringBuilder, Utf16JsStringBuilder},
29 common::StaticJsStrings,
30 iter::Iter,
31 str::{JsStr, JsStrVariant},
32};
33use std::fmt::Write;
34use std::{
35 alloc::{Layout, alloc, dealloc},
36 cell::Cell,
37 convert::Infallible,
38 hash::{Hash, Hasher},
39 process::abort,
40 ptr::{self, NonNull},
41 str::FromStr,
42};
43use std::{borrow::Cow, mem::ManuallyDrop};
44
45fn alloc_overflow() -> ! {
46 panic!("detected overflow during string allocation")
47}
48
49pub(crate) const fn is_trimmable_whitespace(c: char) -> bool {
51 matches!(
59 c,
60 '\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{0020}' | '\u{00A0}' | '\u{FEFF}' |
61 '\u{1680}' | '\u{2000}'
63 ..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' |
64 '\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}'
66 )
67}
68
69pub(crate) const fn is_trimmable_whitespace_latin1(c: u8) -> bool {
71 matches!(
79 c,
80 0x09 | 0x0B | 0x0C | 0x20 | 0xA0 |
81 0x0A | 0x0D
83 )
84}
85
86#[derive(Clone, Copy, Debug, Eq, PartialEq)]
91pub enum CodePoint {
92 Unicode(char),
94
95 UnpairedSurrogate(u16),
97}
98
99impl CodePoint {
100 #[inline]
102 #[must_use]
103 pub const fn code_unit_count(self) -> usize {
104 match self {
105 Self::Unicode(c) => c.len_utf16(),
106 Self::UnpairedSurrogate(_) => 1,
107 }
108 }
109
110 #[inline]
112 #[must_use]
113 pub fn as_u32(self) -> u32 {
114 match self {
115 Self::Unicode(c) => u32::from(c),
116 Self::UnpairedSurrogate(surr) => u32::from(surr),
117 }
118 }
119
120 #[inline]
123 #[must_use]
124 pub const fn as_char(self) -> Option<char> {
125 match self {
126 Self::Unicode(c) => Some(c),
127 Self::UnpairedSurrogate(_) => None,
128 }
129 }
130
131 #[inline]
139 #[must_use]
140 pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
141 match self {
142 Self::Unicode(c) => c.encode_utf16(dst),
143 Self::UnpairedSurrogate(surr) => {
144 dst[0] = surr;
145 &mut dst[0..=0]
146 }
147 }
148 }
149}
150
151impl std::fmt::Display for CodePoint {
152 #[inline]
153 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
154 match self {
155 CodePoint::Unicode(c) => f.write_char(*c),
156 CodePoint::UnpairedSurrogate(c) => {
157 write!(f, "\\u{c:04X}")
158 }
159 }
160 }
161}
162
163#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
171#[repr(transparent)]
172struct TaggedLen(usize);
173
174impl TaggedLen {
175 const LATIN1_BITFLAG: usize = 1 << 0;
176 const BITFLAG_COUNT: usize = 1;
177
178 const fn new(len: usize, latin1: bool) -> Self {
179 Self((len << Self::BITFLAG_COUNT) | (latin1 as usize))
180 }
181
182 const fn is_latin1(self) -> bool {
183 (self.0 & Self::LATIN1_BITFLAG) != 0
184 }
185
186 const fn len(self) -> usize {
187 self.0 >> Self::BITFLAG_COUNT
188 }
189}
190
191#[repr(C)]
193#[allow(missing_debug_implementations)]
194pub struct RawJsString {
195 tagged_len: TaggedLen,
196 refcount: Cell<usize>,
197 data: [u8; 0],
198}
199
200impl RawJsString {
201 const fn is_latin1(&self) -> bool {
202 self.tagged_len.is_latin1()
203 }
204
205 const fn len(&self) -> usize {
206 self.tagged_len.len()
207 }
208}
209
210const DATA_OFFSET: usize = size_of::<RawJsString>();
211
212enum Unwrapped<'a> {
213 Heap(NonNull<RawJsString>),
214 Static(&'a JsStr<'static>),
215}
216
217#[allow(clippy::module_name_repetitions)]
228pub struct JsString {
229 ptr: NonNull<RawJsString>,
230}
231
232static_assertions::assert_eq_size!(JsString, *const ());
234
235impl<'a> From<&'a JsString> for JsStr<'a> {
236 #[inline]
237 fn from(value: &'a JsString) -> Self {
238 value.as_str()
239 }
240}
241
242impl<'a> IntoIterator for &'a JsString {
243 type IntoIter = Iter<'a>;
244 type Item = u16;
245
246 #[inline]
247 fn into_iter(self) -> Self::IntoIter {
248 self.iter()
249 }
250}
251
252impl JsString {
253 #[inline]
255 #[must_use]
256 pub fn iter(&self) -> Iter<'_> {
257 self.as_str().iter()
258 }
259
260 #[inline]
262 #[must_use]
263 pub fn windows(&self, size: usize) -> Windows<'_> {
264 self.as_str().windows(size)
265 }
266
267 #[inline]
270 #[must_use]
271 pub fn to_std_string_escaped(&self) -> String {
272 self.display_escaped().to_string()
273 }
274
275 #[inline]
278 #[must_use]
279 pub fn to_std_string_lossy(&self) -> String {
280 self.display_lossy().to_string()
281 }
282
283 #[inline]
290 pub fn to_std_string(&self) -> Result<String, std::string::FromUtf16Error> {
291 self.as_str().to_std_string()
292 }
293
294 #[inline]
297 pub fn to_std_string_with_surrogates(&self) -> impl Iterator<Item = Result<String, u16>> + '_ {
298 self.as_str().to_std_string_with_surrogates()
299 }
300
301 #[inline]
303 #[must_use]
304 pub fn map_valid_segments<F>(&self, mut f: F) -> Self
305 where
306 F: FnMut(String) -> String,
307 {
308 let mut text = Vec::new();
309
310 for part in self.to_std_string_with_surrogates() {
311 match part {
312 Ok(string) => text.extend(f(string).encode_utf16()),
313 Err(surr) => text.push(surr),
314 }
315 }
316
317 Self::from(&text[..])
318 }
319
320 #[inline]
322 pub fn code_points(&self) -> impl Iterator<Item = CodePoint> + Clone + '_ {
323 self.as_str().code_points()
324 }
325
326 #[inline]
336 #[must_use]
337 pub fn index_of(&self, search_value: JsStr<'_>, from_index: usize) -> Option<usize> {
338 self.as_str().index_of(search_value, from_index)
339 }
340
341 #[inline]
358 #[must_use]
359 pub fn code_point_at(&self, position: usize) -> CodePoint {
360 self.as_str().code_point_at(position)
361 }
362
363 #[inline]
370 #[must_use]
371 pub fn to_number(&self) -> f64 {
372 self.as_str().to_number()
373 }
374
375 #[inline]
377 #[must_use]
378 pub fn len(&self) -> usize {
379 self.as_str().len()
380 }
381
382 #[inline]
384 #[must_use]
385 pub fn is_empty(&self) -> bool {
386 self.len() == 0
387 }
388
389 #[inline]
391 #[must_use]
392 pub fn to_vec(&self) -> Vec<u16> {
393 self.as_str().to_vec()
394 }
395
396 #[inline]
398 #[must_use]
399 pub fn contains(&self, element: u8) -> bool {
400 self.as_str().contains(element)
401 }
402
403 #[inline]
405 #[must_use]
406 pub fn trim(&self) -> JsStr<'_> {
407 self.as_str().trim()
408 }
409
410 #[inline]
412 #[must_use]
413 pub fn trim_start(&self) -> JsStr<'_> {
414 self.as_str().trim_start()
415 }
416
417 #[inline]
419 #[must_use]
420 pub fn trim_end(&self) -> JsStr<'_> {
421 self.as_str().trim_end()
422 }
423
424 #[inline]
426 #[must_use]
427 pub fn get<'a, I>(&'a self, index: I) -> Option<I::Value>
428 where
429 I: JsSliceIndex<'a>,
430 {
431 self.as_str().get(index)
432 }
433
434 #[inline]
440 #[must_use]
441 pub unsafe fn get_unchecked<'a, I>(&'a self, index: I) -> I::Value
442 where
443 I: JsSliceIndex<'a>,
444 {
445 unsafe { self.as_str().get_unchecked(index) }
447 }
448
449 #[inline]
455 #[must_use]
456 pub fn get_expect<'a, I>(&'a self, index: I) -> I::Value
457 where
458 I: JsSliceIndex<'a>,
459 {
460 self.as_str().get_expect(index)
461 }
462
463 #[inline]
467 #[must_use]
468 pub fn display_escaped(&self) -> JsStrDisplayEscaped<'_> {
469 self.as_str().display_escaped()
470 }
471
472 #[inline]
475 #[must_use]
476 pub fn display_lossy(&self) -> JsStrDisplayLossy<'_> {
477 self.as_str().display_lossy()
478 }
479
480 #[inline]
485 #[must_use]
486 pub fn into_raw(self) -> NonNull<RawJsString> {
487 ManuallyDrop::new(self).ptr
488 }
489
490 #[inline]
500 #[must_use]
501 pub unsafe fn from_raw(ptr: NonNull<RawJsString>) -> Self {
502 Self { ptr }
503 }
504}
505
506static_assertions::const_assert!(align_of::<*const JsStr<'static>>() >= 2);
508
509impl JsString {
510 #[must_use]
512 pub const fn from_static_js_str(src: &'static JsStr<'static>) -> Self {
513 let src = ptr::from_ref(src);
514
515 let ptr = unsafe { NonNull::new_unchecked(src.cast_mut()) };
519
520 let tagged_ptr = unsafe { ptr.byte_add(1) };
524
525 JsString {
526 ptr: tagged_ptr.cast::<RawJsString>(),
527 }
528 }
529
530 #[inline]
532 #[must_use]
533 pub fn is_static(&self) -> bool {
534 self.ptr.addr().get() & 1 != 0
535 }
536
537 pub(crate) fn unwrap(&self) -> Unwrapped<'_> {
538 if self.is_static() {
539 let ptr = unsafe { self.ptr.byte_sub(1) };
541
542 Unwrapped::Static(unsafe { ptr.cast::<JsStr<'static>>().as_ref() })
544 } else {
545 Unwrapped::Heap(self.ptr)
546 }
547 }
548
549 #[inline]
551 #[must_use]
552 pub fn as_str(&self) -> JsStr<'_> {
553 let ptr = match self.unwrap() {
554 Unwrapped::Heap(ptr) => ptr.as_ptr(),
555 Unwrapped::Static(js_str) => return *js_str,
556 };
557
558 unsafe {
562 let tagged_len = (*ptr).tagged_len;
563 let len = tagged_len.len();
564 let is_latin1 = tagged_len.is_latin1();
565 let ptr = (&raw const (*ptr).data).cast::<u8>();
566
567 if is_latin1 {
568 JsStr::latin1(std::slice::from_raw_parts(ptr, len))
569 } else {
570 #[allow(clippy::cast_ptr_alignment)]
572 JsStr::utf16(std::slice::from_raw_parts(ptr.cast::<u16>(), len))
573 }
574 }
575 }
576
577 #[inline]
579 #[must_use]
580 pub fn concat(x: JsStr<'_>, y: JsStr<'_>) -> Self {
581 Self::concat_array(&[x, y])
582 }
583
584 #[inline]
587 #[must_use]
588 pub fn concat_array(strings: &[JsStr<'_>]) -> Self {
589 let mut latin1_encoding = true;
590 let mut full_count = 0usize;
591 for string in strings {
592 let Some(sum) = full_count.checked_add(string.len()) else {
593 alloc_overflow()
594 };
595 if !string.is_latin1() {
596 latin1_encoding = false;
597 }
598 full_count = sum;
599 }
600
601 let ptr = Self::allocate_inner(full_count, latin1_encoding);
602
603 let string = {
604 let mut data = unsafe { (&raw mut (*ptr.as_ptr()).data).cast::<u8>() };
606 for &string in strings {
607 unsafe {
618 #[allow(clippy::cast_ptr_alignment)]
620 match (latin1_encoding, string.variant()) {
621 (true, JsStrVariant::Latin1(s)) => {
622 let count = s.len();
623 ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u8>(), count);
624 data = data.cast::<u8>().add(count).cast::<u8>();
625 }
626 (false, JsStrVariant::Latin1(s)) => {
627 let count = s.len();
628 for (i, byte) in s.iter().enumerate() {
629 *data.cast::<u16>().add(i) = u16::from(*byte);
630 }
631 data = data.cast::<u16>().add(count).cast::<u8>();
632 }
633 (false, JsStrVariant::Utf16(s)) => {
634 let count = s.len();
635 ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u16>(), count);
636 data = data.cast::<u16>().add(count).cast::<u8>();
637 }
638 (true, JsStrVariant::Utf16(_)) => {
639 unreachable!("Already checked that it's latin1 encoding")
640 }
641 }
642 }
643 }
644 Self {
645 ptr: unsafe { NonNull::new_unchecked(ptr.as_ptr()) },
647 }
648 };
649
650 StaticJsStrings::get_string(&string.as_str()).unwrap_or(string)
651 }
652
653 fn allocate_inner(str_len: usize, latin1: bool) -> NonNull<RawJsString> {
659 match Self::try_allocate_inner(str_len, latin1) {
660 Ok(v) => v,
661 Err(None) => alloc_overflow(),
662 Err(Some(layout)) => std::alloc::handle_alloc_error(layout),
663 }
664 }
665
666 fn try_allocate_inner(
675 str_len: usize,
676 latin1: bool,
677 ) -> Result<NonNull<RawJsString>, Option<Layout>> {
678 let (layout, offset) = if latin1 {
679 Layout::array::<u8>(str_len)
680 } else {
681 Layout::array::<u16>(str_len)
682 }
683 .and_then(|arr| Layout::new::<RawJsString>().extend(arr))
684 .map(|(layout, offset)| (layout.pad_to_align(), offset))
685 .map_err(|_| None)?;
686
687 debug_assert_eq!(offset, DATA_OFFSET);
688
689 #[allow(clippy::cast_ptr_alignment)]
690 let inner = unsafe { alloc(layout).cast::<RawJsString>() };
694
695 let inner = NonNull::new(inner).ok_or(Some(layout))?;
699
700 unsafe {
704 inner.as_ptr().write(RawJsString {
706 tagged_len: TaggedLen::new(str_len, latin1),
707 refcount: Cell::new(1),
708 data: [0; 0],
709 });
710 }
711
712 debug_assert!({
713 let inner = inner.as_ptr();
714 unsafe {
722 ptr::eq(
723 inner.cast::<u8>().add(offset).cast(),
724 (*inner).data.as_mut_ptr(),
725 )
726 }
727 });
728
729 Ok(inner)
730 }
731
732 fn from_slice_skip_interning(string: JsStr<'_>) -> Self {
734 let count = string.len();
735 let ptr = Self::allocate_inner(count, string.is_latin1());
736
737 let data = unsafe { (&raw mut (*ptr.as_ptr()).data).cast::<u8>() };
739
740 unsafe {
749 #[allow(clippy::cast_ptr_alignment)]
751 match string.variant() {
752 JsStrVariant::Latin1(s) => {
753 ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u8>(), count);
754 }
755 JsStrVariant::Utf16(s) => {
756 ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u16>(), count);
757 }
758 }
759 }
760 Self { ptr }
761 }
762
763 fn from_slice(string: JsStr<'_>) -> Self {
765 if let Some(s) = StaticJsStrings::get_string(&string) {
766 return s;
767 }
768 Self::from_slice_skip_interning(string)
769 }
770
771 #[inline]
773 #[must_use]
774 pub fn refcount(&self) -> Option<usize> {
775 if self.is_static() {
776 return None;
777 }
778
779 let rc = unsafe { self.ptr.as_ref().refcount.get() };
782 Some(rc)
783 }
784}
785
786impl Clone for JsString {
787 #[inline]
788 fn clone(&self) -> Self {
789 if self.is_static() {
790 return Self { ptr: self.ptr };
791 }
792
793 let inner = unsafe { self.ptr.as_ref() };
795
796 let strong = inner.refcount.get().wrapping_add(1);
797 if strong == 0 {
798 abort()
799 }
800
801 inner.refcount.set(strong);
802
803 Self { ptr: self.ptr }
804 }
805}
806
807impl Default for JsString {
808 #[inline]
809 fn default() -> Self {
810 StaticJsStrings::EMPTY_STRING
811 }
812}
813
814impl Drop for JsString {
815 #[inline]
816 fn drop(&mut self) {
817 if self.is_static() {
820 return;
821 }
822
823 let inner = unsafe { self.ptr.as_ref() };
825
826 inner.refcount.set(inner.refcount.get() - 1);
827 if inner.refcount.get() != 0 {
828 return;
829 }
830
831 let layout = unsafe {
835 if inner.is_latin1() {
836 Layout::for_value(inner)
837 .extend(Layout::array::<u8>(inner.len()).unwrap_unchecked())
838 .unwrap_unchecked()
839 .0
840 .pad_to_align()
841 } else {
842 Layout::for_value(inner)
843 .extend(Layout::array::<u16>(inner.len()).unwrap_unchecked())
844 .unwrap_unchecked()
845 .0
846 .pad_to_align()
847 }
848 };
849
850 unsafe {
854 dealloc(self.ptr.cast().as_ptr(), layout);
855 }
856 }
857}
858
859impl std::fmt::Debug for JsString {
860 #[inline]
861 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
862 self.as_str().fmt(f)
863 }
864}
865
866impl Eq for JsString {}
867
868macro_rules! impl_from_number_for_js_string {
869 ($($module: ident => $($ty:ty),+)+) => {
870 $(
871 $(
872 impl From<$ty> for JsString {
873 #[inline]
874 fn from(value: $ty) -> Self {
875 JsString::from_slice_skip_interning(JsStr::latin1(
876 $module::Buffer::new().format(value).as_bytes(),
877 ))
878 }
879 }
880 )+
881 )+
882 };
883}
884
885impl_from_number_for_js_string!(
886 itoa => i8, i16, i32, i64, i128, u8, u16, u32, u64, u128, isize, usize
887 ryu_js => f32, f64
888);
889
890impl From<&[u16]> for JsString {
891 #[inline]
892 fn from(s: &[u16]) -> Self {
893 JsString::from_slice(JsStr::utf16(s))
894 }
895}
896
897impl From<&str> for JsString {
898 #[inline]
899 fn from(s: &str) -> Self {
900 if s.is_ascii() {
902 let js_str = JsStr::latin1(s.as_bytes());
903 return StaticJsStrings::get_string(&js_str)
904 .unwrap_or_else(|| JsString::from_slice_skip_interning(js_str));
905 }
906 let s = s.encode_utf16().collect::<Vec<_>>();
907 JsString::from_slice_skip_interning(JsStr::utf16(&s[..]))
908 }
909}
910
911impl From<JsStr<'_>> for JsString {
912 #[inline]
913 fn from(value: JsStr<'_>) -> Self {
914 StaticJsStrings::get_string(&value)
915 .unwrap_or_else(|| JsString::from_slice_skip_interning(value))
916 }
917}
918
919impl From<&[JsString]> for JsString {
920 #[inline]
921 fn from(value: &[JsString]) -> Self {
922 Self::concat_array(&value.iter().map(Self::as_str).collect::<Vec<_>>()[..])
923 }
924}
925
926impl<const N: usize> From<&[JsString; N]> for JsString {
927 #[inline]
928 fn from(value: &[JsString; N]) -> Self {
929 Self::concat_array(&value.iter().map(Self::as_str).collect::<Vec<_>>()[..])
930 }
931}
932
933impl From<String> for JsString {
934 #[inline]
935 fn from(s: String) -> Self {
936 Self::from(s.as_str())
937 }
938}
939
940impl<'a> From<Cow<'a, str>> for JsString {
941 #[inline]
942 fn from(s: Cow<'a, str>) -> Self {
943 match s {
944 Cow::Borrowed(s) => s.into(),
945 Cow::Owned(s) => s.into(),
946 }
947 }
948}
949
950impl<const N: usize> From<&[u16; N]> for JsString {
951 #[inline]
952 fn from(s: &[u16; N]) -> Self {
953 Self::from(&s[..])
954 }
955}
956
957impl Hash for JsString {
958 #[inline]
959 fn hash<H: Hasher>(&self, state: &mut H) {
960 self.as_str().hash(state);
961 }
962}
963
964impl PartialOrd for JsStr<'_> {
965 #[inline]
966 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
967 Some(self.cmp(other))
968 }
969}
970
971impl Ord for JsString {
972 #[inline]
973 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
974 self.as_str().cmp(&other.as_str())
975 }
976}
977
978impl PartialEq for JsString {
979 #[inline]
980 fn eq(&self, other: &Self) -> bool {
981 self.as_str() == other.as_str()
982 }
983}
984
985impl PartialEq<JsString> for [u16] {
986 #[inline]
987 fn eq(&self, other: &JsString) -> bool {
988 if self.len() != other.len() {
989 return false;
990 }
991 for (x, y) in self.iter().copied().zip(other.iter()) {
992 if x != y {
993 return false;
994 }
995 }
996 true
997 }
998}
999
1000impl<const N: usize> PartialEq<JsString> for [u16; N] {
1001 #[inline]
1002 fn eq(&self, other: &JsString) -> bool {
1003 self[..] == *other
1004 }
1005}
1006
1007impl PartialEq<[u16]> for JsString {
1008 #[inline]
1009 fn eq(&self, other: &[u16]) -> bool {
1010 other == self
1011 }
1012}
1013
1014impl<const N: usize> PartialEq<[u16; N]> for JsString {
1015 #[inline]
1016 fn eq(&self, other: &[u16; N]) -> bool {
1017 *self == other[..]
1018 }
1019}
1020
1021impl PartialEq<str> for JsString {
1022 #[inline]
1023 fn eq(&self, other: &str) -> bool {
1024 self.as_str() == other
1025 }
1026}
1027
1028impl PartialEq<&str> for JsString {
1029 #[inline]
1030 fn eq(&self, other: &&str) -> bool {
1031 self.as_str() == *other
1032 }
1033}
1034
1035impl PartialEq<JsString> for str {
1036 #[inline]
1037 fn eq(&self, other: &JsString) -> bool {
1038 other == self
1039 }
1040}
1041
1042impl PartialEq<JsStr<'_>> for JsString {
1043 #[inline]
1044 fn eq(&self, other: &JsStr<'_>) -> bool {
1045 self.as_str() == *other
1046 }
1047}
1048
1049impl PartialEq<JsString> for JsStr<'_> {
1050 #[inline]
1051 fn eq(&self, other: &JsString) -> bool {
1052 other == self
1053 }
1054}
1055
1056impl PartialOrd for JsString {
1057 #[inline]
1058 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
1059 Some(self.cmp(other))
1060 }
1061}
1062
1063impl FromStr for JsString {
1064 type Err = Infallible;
1065
1066 #[inline]
1067 fn from_str(s: &str) -> Result<Self, Self::Err> {
1068 Ok(Self::from(s))
1069 }
1070}