Skip to main content

boa_string/
lib.rs

1//! A Latin1 or UTF-16 encoded, reference counted, immutable string.
2
3// Required per unsafe code standards to ensure every unsafe usage is properly documented.
4// - `unsafe_op_in_unsafe_fn` will be warn-by-default in edition 2024:
5//   https://github.com/rust-lang/rust/issues/71668#issuecomment-1189396860
6// - `undocumented_unsafe_blocks` and `missing_safety_doc` requires a `Safety:` section in the
7//   comment or doc of the unsafe block or function, respectively.
8#![deny(
9    unsafe_op_in_unsafe_fn,
10    clippy::undocumented_unsafe_blocks,
11    clippy::missing_safety_doc
12)]
13#![allow(clippy::module_name_repetitions)]
14
15mod builder;
16mod common;
17mod display;
18mod iter;
19mod str;
20
21#[cfg(test)]
22mod tests;
23
24use self::{iter::Windows, str::JsSliceIndex};
25use crate::display::{JsStrDisplayEscaped, JsStrDisplayLossy};
26#[doc(inline)]
27pub use crate::{
28    builder::{CommonJsStringBuilder, Latin1JsStringBuilder, Utf16JsStringBuilder},
29    common::StaticJsStrings,
30    iter::Iter,
31    str::{JsStr, JsStrVariant},
32};
33use std::fmt::Write;
34use std::{
35    alloc::{Layout, alloc, dealloc},
36    cell::Cell,
37    convert::Infallible,
38    hash::{Hash, Hasher},
39    process::abort,
40    ptr::{self, NonNull},
41    str::FromStr,
42};
43use std::{borrow::Cow, mem::ManuallyDrop};
44
45fn alloc_overflow() -> ! {
46    panic!("detected overflow during string allocation")
47}
48
49/// Helper function to check if a `char` is trimmable.
50pub(crate) const fn is_trimmable_whitespace(c: char) -> bool {
51    // The rust implementation of `trim` does not regard the same characters whitespace as ecma standard does
52    //
53    // Rust uses \p{White_Space} by default, which also includes:
54    // `\u{0085}' (next line)
55    // And does not include:
56    // '\u{FEFF}' (zero width non-breaking space)
57    // Explicit whitespace: https://tc39.es/ecma262/#sec-white-space
58    matches!(
59        c,
60        '\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{0020}' | '\u{00A0}' | '\u{FEFF}' |
61    // Unicode Space_Separator category
62    '\u{1680}' | '\u{2000}'
63            ..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' |
64    // Line terminators: https://tc39.es/ecma262/#sec-line-terminators
65    '\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}'
66    )
67}
68
69/// Helper function to check if a `u8` latin1 character is trimmable.
70pub(crate) const fn is_trimmable_whitespace_latin1(c: u8) -> bool {
71    // The rust implementation of `trim` does not regard the same characters whitespace as ecma standard does
72    //
73    // Rust uses \p{White_Space} by default, which also includes:
74    // `\u{0085}' (next line)
75    // And does not include:
76    // '\u{FEFF}' (zero width non-breaking space)
77    // Explicit whitespace: https://tc39.es/ecma262/#sec-white-space
78    matches!(
79        c,
80        0x09 | 0x0B | 0x0C | 0x20 | 0xA0 |
81        // Line terminators: https://tc39.es/ecma262/#sec-line-terminators
82        0x0A | 0x0D
83    )
84}
85
86/// Represents a Unicode codepoint within a [`JsString`], which could be a valid
87/// '[Unicode scalar value]', or an unpaired surrogate.
88///
89/// [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
90#[derive(Clone, Copy, Debug, Eq, PartialEq)]
91pub enum CodePoint {
92    /// A valid Unicode scalar value.
93    Unicode(char),
94
95    /// An unpaired surrogate.
96    UnpairedSurrogate(u16),
97}
98
99impl CodePoint {
100    /// Get the number of UTF-16 code units needed to encode this code point.
101    #[inline]
102    #[must_use]
103    pub const fn code_unit_count(self) -> usize {
104        match self {
105            Self::Unicode(c) => c.len_utf16(),
106            Self::UnpairedSurrogate(_) => 1,
107        }
108    }
109
110    /// Convert the code point to its [`u32`] representation.
111    #[inline]
112    #[must_use]
113    pub fn as_u32(self) -> u32 {
114        match self {
115            Self::Unicode(c) => u32::from(c),
116            Self::UnpairedSurrogate(surr) => u32::from(surr),
117        }
118    }
119
120    /// If the code point represents a valid 'Unicode scalar value', returns its [`char`]
121    /// representation, otherwise returns [`None`] on unpaired surrogates.
122    #[inline]
123    #[must_use]
124    pub const fn as_char(self) -> Option<char> {
125        match self {
126            Self::Unicode(c) => Some(c),
127            Self::UnpairedSurrogate(_) => None,
128        }
129    }
130
131    /// Encodes this code point as UTF-16 into the provided u16 buffer, and then returns the subslice
132    /// of the buffer that contains the encoded character.
133    ///
134    /// # Panics
135    ///
136    /// Panics if the buffer is not large enough. A buffer of length 2 is large enough to encode any
137    /// code point.
138    #[inline]
139    #[must_use]
140    pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
141        match self {
142            Self::Unicode(c) => c.encode_utf16(dst),
143            Self::UnpairedSurrogate(surr) => {
144                dst[0] = surr;
145                &mut dst[0..=0]
146            }
147        }
148    }
149}
150
151impl std::fmt::Display for CodePoint {
152    #[inline]
153    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
154        match self {
155            CodePoint::Unicode(c) => f.write_char(*c),
156            CodePoint::UnpairedSurrogate(c) => {
157                write!(f, "\\u{c:04X}")
158            }
159        }
160    }
161}
162
163/// A `usize` contains a flag and the length of Latin1/UTF-16 .
164/// ```text
165/// ┌────────────────────────────────────┐
166/// │ length (usize::BITS - 1) │ flag(1) │
167/// └────────────────────────────────────┘
168/// ```
169/// The latin1/UTF-16 flag is stored in the bottom bit.
170#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
171#[repr(transparent)]
172struct TaggedLen(usize);
173
174impl TaggedLen {
175    const LATIN1_BITFLAG: usize = 1 << 0;
176    const BITFLAG_COUNT: usize = 1;
177
178    const fn new(len: usize, latin1: bool) -> Self {
179        Self((len << Self::BITFLAG_COUNT) | (latin1 as usize))
180    }
181
182    const fn is_latin1(self) -> bool {
183        (self.0 & Self::LATIN1_BITFLAG) != 0
184    }
185
186    const fn len(self) -> usize {
187        self.0 >> Self::BITFLAG_COUNT
188    }
189}
190
191/// The raw representation of a [`JsString`] in the heap.
192#[repr(C)]
193#[allow(missing_debug_implementations)]
194pub struct RawJsString {
195    tagged_len: TaggedLen,
196    refcount: Cell<usize>,
197    data: [u8; 0],
198}
199
200impl RawJsString {
201    const fn is_latin1(&self) -> bool {
202        self.tagged_len.is_latin1()
203    }
204
205    const fn len(&self) -> usize {
206        self.tagged_len.len()
207    }
208}
209
210const DATA_OFFSET: usize = size_of::<RawJsString>();
211
212enum Unwrapped<'a> {
213    Heap(NonNull<RawJsString>),
214    Static(&'a JsStr<'static>),
215}
216
217/// A Latin1 or UTF-16–encoded, reference counted, immutable string.
218///
219/// This is pretty similar to a <code>[Rc][std::rc::Rc]\<[\[u16\]][slice]\></code>, but without the
220/// length metadata associated with the `Rc` fat pointer. Instead, the length of every string is
221/// stored on the heap, along with its reference counter and its data.
222///
223/// The string can be latin1 (stored as a byte for space efficiency) or U16 encoding.
224///
225/// We define some commonly used string constants in an interner. For these strings, we don't allocate
226/// memory on the heap to reduce the overhead of memory allocation and reference counting.
227#[allow(clippy::module_name_repetitions)]
228pub struct JsString {
229    ptr: NonNull<RawJsString>,
230}
231
232// JsString should always be pointer sized.
233static_assertions::assert_eq_size!(JsString, *const ());
234
235impl<'a> From<&'a JsString> for JsStr<'a> {
236    #[inline]
237    fn from(value: &'a JsString) -> Self {
238        value.as_str()
239    }
240}
241
242impl<'a> IntoIterator for &'a JsString {
243    type IntoIter = Iter<'a>;
244    type Item = u16;
245
246    #[inline]
247    fn into_iter(self) -> Self::IntoIter {
248        self.iter()
249    }
250}
251
252impl JsString {
253    /// Create an iterator over the [`JsString`].
254    #[inline]
255    #[must_use]
256    pub fn iter(&self) -> Iter<'_> {
257        self.as_str().iter()
258    }
259
260    /// Create an iterator over overlapping subslices of length size.
261    #[inline]
262    #[must_use]
263    pub fn windows(&self, size: usize) -> Windows<'_> {
264        self.as_str().windows(size)
265    }
266
267    /// Decodes a [`JsString`] into a [`String`], replacing invalid data with its escaped representation
268    /// in 4 digit hexadecimal.
269    #[inline]
270    #[must_use]
271    pub fn to_std_string_escaped(&self) -> String {
272        self.display_escaped().to_string()
273    }
274
275    /// Decodes a [`JsString`] into a [`String`], replacing invalid data with the
276    /// replacement character U+FFFD.
277    #[inline]
278    #[must_use]
279    pub fn to_std_string_lossy(&self) -> String {
280        self.display_lossy().to_string()
281    }
282
283    /// Decodes a [`JsString`] into a [`String`], returning an error if the string contains unpaired
284    /// surrogates.
285    ///
286    /// # Errors
287    ///
288    /// [`FromUtf16Error`][std::string::FromUtf16Error] if it contains any invalid data.
289    #[inline]
290    pub fn to_std_string(&self) -> Result<String, std::string::FromUtf16Error> {
291        self.as_str().to_std_string()
292    }
293
294    /// Decodes a [`JsString`] into an iterator of [`Result<String, u16>`], returning surrogates as
295    /// errors.
296    #[inline]
297    pub fn to_std_string_with_surrogates(&self) -> impl Iterator<Item = Result<String, u16>> + '_ {
298        self.as_str().to_std_string_with_surrogates()
299    }
300
301    /// Maps the valid segments of an UTF16 string and leaves the unpaired surrogates unchanged.
302    #[inline]
303    #[must_use]
304    pub fn map_valid_segments<F>(&self, mut f: F) -> Self
305    where
306        F: FnMut(String) -> String,
307    {
308        let mut text = Vec::new();
309
310        for part in self.to_std_string_with_surrogates() {
311            match part {
312                Ok(string) => text.extend(f(string).encode_utf16()),
313                Err(surr) => text.push(surr),
314            }
315        }
316
317        Self::from(&text[..])
318    }
319
320    /// Gets an iterator of all the Unicode codepoints of a [`JsString`].
321    #[inline]
322    pub fn code_points(&self) -> impl Iterator<Item = CodePoint> + Clone + '_ {
323        self.as_str().code_points()
324    }
325
326    /// Abstract operation `StringIndexOf ( string, searchValue, fromIndex )`
327    ///
328    /// Note: Instead of returning an isize with `-1` as the "not found" value, we make use of the
329    /// type system and return <code>[Option]\<usize\></code> with [`None`] as the "not found" value.
330    ///
331    /// More information:
332    ///  - [ECMAScript reference][spec]
333    ///
334    /// [spec]: https://tc39.es/ecma262/#sec-stringindexof
335    #[inline]
336    #[must_use]
337    pub fn index_of(&self, search_value: JsStr<'_>, from_index: usize) -> Option<usize> {
338        self.as_str().index_of(search_value, from_index)
339    }
340
341    /// Abstract operation `CodePointAt( string, position )`.
342    ///
343    /// The abstract operation `CodePointAt` takes arguments `string` (a String) and `position` (a
344    /// non-negative integer) and returns a Record with fields `[[CodePoint]]` (a code point),
345    /// `[[CodeUnitCount]]` (a positive integer), and `[[IsUnpairedSurrogate]]` (a Boolean). It
346    /// interprets string as a sequence of UTF-16 encoded code points, as described in 6.1.4, and reads
347    /// from it a single code point starting with the code unit at index `position`.
348    ///
349    /// More information:
350    ///  - [ECMAScript reference][spec]
351    ///
352    /// [spec]: https://tc39.es/ecma262/#sec-codepointat
353    ///
354    /// # Panics
355    ///
356    /// If `position` is smaller than size of string.
357    #[inline]
358    #[must_use]
359    pub fn code_point_at(&self, position: usize) -> CodePoint {
360        self.as_str().code_point_at(position)
361    }
362
363    /// Abstract operation `StringToNumber ( str )`
364    ///
365    /// More information:
366    /// - [ECMAScript reference][spec]
367    ///
368    /// [spec]: https://tc39.es/ecma262/#sec-stringtonumber
369    #[inline]
370    #[must_use]
371    pub fn to_number(&self) -> f64 {
372        self.as_str().to_number()
373    }
374
375    /// Get the length of the [`JsString`].
376    #[inline]
377    #[must_use]
378    pub fn len(&self) -> usize {
379        self.as_str().len()
380    }
381
382    /// Return true if the [`JsString`] is emtpy.
383    #[inline]
384    #[must_use]
385    pub fn is_empty(&self) -> bool {
386        self.len() == 0
387    }
388
389    /// Convert the [`JsString`] into a [`Vec<U16>`].
390    #[inline]
391    #[must_use]
392    pub fn to_vec(&self) -> Vec<u16> {
393        self.as_str().to_vec()
394    }
395
396    /// Check if the [`JsString`] contains a byte.
397    #[inline]
398    #[must_use]
399    pub fn contains(&self, element: u8) -> bool {
400        self.as_str().contains(element)
401    }
402
403    /// Trim whitespace from the start and end of the [`JsString`].
404    #[inline]
405    #[must_use]
406    pub fn trim(&self) -> JsStr<'_> {
407        self.as_str().trim()
408    }
409
410    /// Trim whitespace from the start of the [`JsString`].
411    #[inline]
412    #[must_use]
413    pub fn trim_start(&self) -> JsStr<'_> {
414        self.as_str().trim_start()
415    }
416
417    /// Trim whitespace from the end of the [`JsString`].
418    #[inline]
419    #[must_use]
420    pub fn trim_end(&self) -> JsStr<'_> {
421        self.as_str().trim_end()
422    }
423
424    /// Get the element a the given index, [`None`] otherwise.
425    #[inline]
426    #[must_use]
427    pub fn get<'a, I>(&'a self, index: I) -> Option<I::Value>
428    where
429        I: JsSliceIndex<'a>,
430    {
431        self.as_str().get(index)
432    }
433
434    /// Returns an element or subslice depending on the type of index, without doing bounds check.
435    ///
436    /// # Safety
437    ///
438    /// Caller must ensure the index is not out of bounds
439    #[inline]
440    #[must_use]
441    pub unsafe fn get_unchecked<'a, I>(&'a self, index: I) -> I::Value
442    where
443        I: JsSliceIndex<'a>,
444    {
445        // SAFETY: Caller must ensure the index is not out of bounds
446        unsafe { self.as_str().get_unchecked(index) }
447    }
448
449    /// Get the element a the given index.
450    ///
451    /// # Panics
452    ///
453    /// If the index is out of bounds.
454    #[inline]
455    #[must_use]
456    pub fn get_expect<'a, I>(&'a self, index: I) -> I::Value
457    where
458        I: JsSliceIndex<'a>,
459    {
460        self.as_str().get_expect(index)
461    }
462
463    /// Gets a displayable escaped string. This may be faster and has fewer
464    /// allocations than `format!("{}", str.to_string_escaped())` when
465    /// displaying.
466    #[inline]
467    #[must_use]
468    pub fn display_escaped(&self) -> JsStrDisplayEscaped<'_> {
469        self.as_str().display_escaped()
470    }
471
472    /// Gets a displayable lossy string. This may be faster and has fewer
473    /// allocations than `format!("{}", str.to_string_lossy())` when displaying.
474    #[inline]
475    #[must_use]
476    pub fn display_lossy(&self) -> JsStrDisplayLossy<'_> {
477        self.as_str().display_lossy()
478    }
479
480    /// Consumes the [`JsString`], returning a pointer to `RawJsString`.
481    ///
482    /// To avoid a memory leak the pointer must be converted back to a `JsString` using
483    /// [`JsString::from_raw`].
484    #[inline]
485    #[must_use]
486    pub fn into_raw(self) -> NonNull<RawJsString> {
487        ManuallyDrop::new(self).ptr
488    }
489
490    /// Constructs a `JsString` from a pointer to `RawJsString`.
491    ///
492    /// The raw pointer must have been previously returned by a call to
493    /// [`JsString::into_raw`].
494    ///
495    /// # Safety
496    ///
497    /// This function is unsafe because improper use may lead to memory unsafety,
498    /// even if the returned `JsString` is never accessed.
499    #[inline]
500    #[must_use]
501    pub unsafe fn from_raw(ptr: NonNull<RawJsString>) -> Self {
502        Self { ptr }
503    }
504}
505
506// `&JsStr<'static>` must always be aligned so it can be taggged.
507static_assertions::const_assert!(align_of::<*const JsStr<'static>>() >= 2);
508
509impl JsString {
510    /// Create a [`JsString`] from a static js string.
511    #[must_use]
512    pub const fn from_static_js_str(src: &'static JsStr<'static>) -> Self {
513        let src = ptr::from_ref(src);
514
515        // SAFETY: A reference cannot be null, so this is safe.
516        //
517        // TODO: Replace once `NonNull::from_ref()` is stabilized.
518        let ptr = unsafe { NonNull::new_unchecked(src.cast_mut()) };
519
520        // SAFETY:
521        // - Adding one to an aligned pointer will tag the pointer's last bit.
522        // - The pointer's provenance remains unchanged, so this is safe.
523        let tagged_ptr = unsafe { ptr.byte_add(1) };
524
525        JsString {
526            ptr: tagged_ptr.cast::<RawJsString>(),
527        }
528    }
529
530    /// Check if the [`JsString`] is static.
531    #[inline]
532    #[must_use]
533    pub fn is_static(&self) -> bool {
534        self.ptr.addr().get() & 1 != 0
535    }
536
537    pub(crate) fn unwrap(&self) -> Unwrapped<'_> {
538        if self.is_static() {
539            // SAFETY: Static pointer is tagged and already checked, so this is safe.
540            let ptr = unsafe { self.ptr.byte_sub(1) };
541
542            // SAFETY: A static pointer always points to a valid JsStr, so this is safe.
543            Unwrapped::Static(unsafe { ptr.cast::<JsStr<'static>>().as_ref() })
544        } else {
545            Unwrapped::Heap(self.ptr)
546        }
547    }
548
549    /// Obtains the underlying [`&[u16]`][slice] slice of a [`JsString`]
550    #[inline]
551    #[must_use]
552    pub fn as_str(&self) -> JsStr<'_> {
553        let ptr = match self.unwrap() {
554            Unwrapped::Heap(ptr) => ptr.as_ptr(),
555            Unwrapped::Static(js_str) => return *js_str,
556        };
557
558        // SAFETY:
559        // - Unwrapped heap ptr is always a valid heap allocated RawJsString.
560        // - Length of a heap allocated string always contains the correct size of the string.
561        unsafe {
562            let tagged_len = (*ptr).tagged_len;
563            let len = tagged_len.len();
564            let is_latin1 = tagged_len.is_latin1();
565            let ptr = (&raw const (*ptr).data).cast::<u8>();
566
567            if is_latin1 {
568                JsStr::latin1(std::slice::from_raw_parts(ptr, len))
569            } else {
570                // SAFETY: Raw data string is always correctly aligned when allocated.
571                #[allow(clippy::cast_ptr_alignment)]
572                JsStr::utf16(std::slice::from_raw_parts(ptr.cast::<u16>(), len))
573            }
574        }
575    }
576
577    /// Creates a new [`JsString`] from the concatenation of `x` and `y`.
578    #[inline]
579    #[must_use]
580    pub fn concat(x: JsStr<'_>, y: JsStr<'_>) -> Self {
581        Self::concat_array(&[x, y])
582    }
583
584    /// Creates a new [`JsString`] from the concatenation of every element of
585    /// `strings`.
586    #[inline]
587    #[must_use]
588    pub fn concat_array(strings: &[JsStr<'_>]) -> Self {
589        let mut latin1_encoding = true;
590        let mut full_count = 0usize;
591        for string in strings {
592            let Some(sum) = full_count.checked_add(string.len()) else {
593                alloc_overflow()
594            };
595            if !string.is_latin1() {
596                latin1_encoding = false;
597            }
598            full_count = sum;
599        }
600
601        let ptr = Self::allocate_inner(full_count, latin1_encoding);
602
603        let string = {
604            // SAFETY: `allocate_inner` guarantees that `ptr` is a valid pointer.
605            let mut data = unsafe { (&raw mut (*ptr.as_ptr()).data).cast::<u8>() };
606            for &string in strings {
607                // SAFETY:
608                // The sum of all `count` for each `string` equals `full_count`, and since we're
609                // iteratively writing each of them to `data`, `copy_non_overlapping` always stays
610                // in-bounds for `count` reads of each string and `full_count` writes to `data`.
611                //
612                // Each `string` must be properly aligned to be a valid slice, and `data` must be
613                // properly aligned by `allocate_inner`.
614                //
615                // `allocate_inner` must return a valid pointer to newly allocated memory, meaning
616                // `ptr` and all `string`s should never overlap.
617                unsafe {
618                    // NOTE: The aligment is checked when we allocate the array.
619                    #[allow(clippy::cast_ptr_alignment)]
620                    match (latin1_encoding, string.variant()) {
621                        (true, JsStrVariant::Latin1(s)) => {
622                            let count = s.len();
623                            ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u8>(), count);
624                            data = data.cast::<u8>().add(count).cast::<u8>();
625                        }
626                        (false, JsStrVariant::Latin1(s)) => {
627                            let count = s.len();
628                            for (i, byte) in s.iter().enumerate() {
629                                *data.cast::<u16>().add(i) = u16::from(*byte);
630                            }
631                            data = data.cast::<u16>().add(count).cast::<u8>();
632                        }
633                        (false, JsStrVariant::Utf16(s)) => {
634                            let count = s.len();
635                            ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u16>(), count);
636                            data = data.cast::<u16>().add(count).cast::<u8>();
637                        }
638                        (true, JsStrVariant::Utf16(_)) => {
639                            unreachable!("Already checked that it's latin1 encoding")
640                        }
641                    }
642                }
643            }
644            Self {
645                // SAFETY: We already know it's a valid heap pointer.
646                ptr: unsafe { NonNull::new_unchecked(ptr.as_ptr()) },
647            }
648        };
649
650        StaticJsStrings::get_string(&string.as_str()).unwrap_or(string)
651    }
652
653    /// Allocates a new [`RawJsString`] with an internal capacity of `str_len` chars.
654    ///
655    /// # Panics
656    ///
657    /// Panics if `try_allocate_inner` returns `Err`.
658    fn allocate_inner(str_len: usize, latin1: bool) -> NonNull<RawJsString> {
659        match Self::try_allocate_inner(str_len, latin1) {
660            Ok(v) => v,
661            Err(None) => alloc_overflow(),
662            Err(Some(layout)) => std::alloc::handle_alloc_error(layout),
663        }
664    }
665
666    // This is marked as safe because it is always valid to call this function to request any number
667    // of `u16`, since this function ought to fail on an OOM error.
668    /// Allocates a new [`RawJsString`] with an internal capacity of `str_len` chars.
669    ///
670    /// # Errors
671    ///
672    /// Returns `Err(None)` on integer overflows `usize::MAX`.
673    /// Returns `Err(Some(Layout))` on allocation error.
674    fn try_allocate_inner(
675        str_len: usize,
676        latin1: bool,
677    ) -> Result<NonNull<RawJsString>, Option<Layout>> {
678        let (layout, offset) = if latin1 {
679            Layout::array::<u8>(str_len)
680        } else {
681            Layout::array::<u16>(str_len)
682        }
683        .and_then(|arr| Layout::new::<RawJsString>().extend(arr))
684        .map(|(layout, offset)| (layout.pad_to_align(), offset))
685        .map_err(|_| None)?;
686
687        debug_assert_eq!(offset, DATA_OFFSET);
688
689        #[allow(clippy::cast_ptr_alignment)]
690        // SAFETY:
691        // The layout size of `RawJsString` is never zero, since it has to store
692        // the length of the string and the reference count.
693        let inner = unsafe { alloc(layout).cast::<RawJsString>() };
694
695        // We need to verify that the pointer returned by `alloc` is not null, otherwise
696        // we should abort, since an allocation error is pretty unrecoverable for us
697        // right now.
698        let inner = NonNull::new(inner).ok_or(Some(layout))?;
699
700        // SAFETY:
701        // `NonNull` verified for us that the pointer returned by `alloc` is valid,
702        // meaning we can write to its pointed memory.
703        unsafe {
704            // Write the first part, the `RawJsString`.
705            inner.as_ptr().write(RawJsString {
706                tagged_len: TaggedLen::new(str_len, latin1),
707                refcount: Cell::new(1),
708                data: [0; 0],
709            });
710        }
711
712        debug_assert!({
713            let inner = inner.as_ptr();
714            // SAFETY:
715            // - `inner` must be a valid pointer, since it comes from a `NonNull`,
716            // meaning we can safely dereference it to `RawJsString`.
717            // - `offset` should point us to the beginning of the array,
718            // and since we requested an `RawJsString` layout with a trailing
719            // `[u16; str_len]`, the memory of the array must be in the `usize`
720            // range for the allocation to succeed.
721            unsafe {
722                ptr::eq(
723                    inner.cast::<u8>().add(offset).cast(),
724                    (*inner).data.as_mut_ptr(),
725                )
726            }
727        });
728
729        Ok(inner)
730    }
731
732    /// Creates a new [`JsString`] from `data`, without checking if the string is in the interner.
733    fn from_slice_skip_interning(string: JsStr<'_>) -> Self {
734        let count = string.len();
735        let ptr = Self::allocate_inner(count, string.is_latin1());
736
737        // SAFETY: `allocate_inner` guarantees that `ptr` is a valid pointer.
738        let data = unsafe { (&raw mut (*ptr.as_ptr()).data).cast::<u8>() };
739
740        // SAFETY:
741        // - We read `count = data.len()` elements from `data`, which is within the bounds of the slice.
742        // - `allocate_inner` must allocate at least `count` elements, which allows us to safely
743        //   write at least `count` elements.
744        // - `allocate_inner` should already take care of the alignment of `ptr`, and `data` must be
745        //   aligned to be a valid slice.
746        // - `allocate_inner` must return a valid pointer to newly allocated memory, meaning `ptr`
747        //   and `data` should never overlap.
748        unsafe {
749            // NOTE: The aligment is checked when we allocate the array.
750            #[allow(clippy::cast_ptr_alignment)]
751            match string.variant() {
752                JsStrVariant::Latin1(s) => {
753                    ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u8>(), count);
754                }
755                JsStrVariant::Utf16(s) => {
756                    ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u16>(), count);
757                }
758            }
759        }
760        Self { ptr }
761    }
762
763    /// Creates a new [`JsString`] from `data`.
764    fn from_slice(string: JsStr<'_>) -> Self {
765        if let Some(s) = StaticJsStrings::get_string(&string) {
766            return s;
767        }
768        Self::from_slice_skip_interning(string)
769    }
770
771    /// Gets the number of `JsString`s which point to this allocation.
772    #[inline]
773    #[must_use]
774    pub fn refcount(&self) -> Option<usize> {
775        if self.is_static() {
776            return None;
777        }
778
779        // SAFETY:
780        // `NonNull` and the constructions of `JsString` guarantee that `inner` is always valid.
781        let rc = unsafe { self.ptr.as_ref().refcount.get() };
782        Some(rc)
783    }
784}
785
786impl Clone for JsString {
787    #[inline]
788    fn clone(&self) -> Self {
789        if self.is_static() {
790            return Self { ptr: self.ptr };
791        }
792
793        // SAFETY: `NonNull` and the constructions of `JsString` guarantee that `inner` is always valid.
794        let inner = unsafe { self.ptr.as_ref() };
795
796        let strong = inner.refcount.get().wrapping_add(1);
797        if strong == 0 {
798            abort()
799        }
800
801        inner.refcount.set(strong);
802
803        Self { ptr: self.ptr }
804    }
805}
806
807impl Default for JsString {
808    #[inline]
809    fn default() -> Self {
810        StaticJsStrings::EMPTY_STRING
811    }
812}
813
814impl Drop for JsString {
815    #[inline]
816    fn drop(&mut self) {
817        // See https://doc.rust-lang.org/src/alloc/sync.rs.html#1672 for details.
818
819        if self.is_static() {
820            return;
821        }
822
823        // SAFETY: `NonNull` and the constructions of `JsString` guarantees that `raw` is always valid.
824        let inner = unsafe { self.ptr.as_ref() };
825
826        inner.refcount.set(inner.refcount.get() - 1);
827        if inner.refcount.get() != 0 {
828            return;
829        }
830
831        // SAFETY:
832        // All the checks for the validity of the layout have already been made on `alloc_inner`,
833        // so we can skip the unwrap.
834        let layout = unsafe {
835            if inner.is_latin1() {
836                Layout::for_value(inner)
837                    .extend(Layout::array::<u8>(inner.len()).unwrap_unchecked())
838                    .unwrap_unchecked()
839                    .0
840                    .pad_to_align()
841            } else {
842                Layout::for_value(inner)
843                    .extend(Layout::array::<u16>(inner.len()).unwrap_unchecked())
844                    .unwrap_unchecked()
845                    .0
846                    .pad_to_align()
847            }
848        };
849
850        // SAFETY:
851        // If refcount is 0 and we call drop, that means this is the last `JsString` which
852        // points to this memory allocation, so deallocating it is safe.
853        unsafe {
854            dealloc(self.ptr.cast().as_ptr(), layout);
855        }
856    }
857}
858
859impl std::fmt::Debug for JsString {
860    #[inline]
861    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
862        self.as_str().fmt(f)
863    }
864}
865
866impl Eq for JsString {}
867
868macro_rules! impl_from_number_for_js_string {
869    ($($module: ident => $($ty:ty),+)+) => {
870        $(
871            $(
872                impl From<$ty> for JsString {
873                    #[inline]
874                    fn from(value: $ty) -> Self {
875                        JsString::from_slice_skip_interning(JsStr::latin1(
876                            $module::Buffer::new().format(value).as_bytes(),
877                        ))
878                    }
879                }
880            )+
881        )+
882    };
883}
884
885impl_from_number_for_js_string!(
886    itoa => i8, i16, i32, i64, i128, u8, u16, u32, u64, u128, isize, usize
887    ryu_js => f32, f64
888);
889
890impl From<&[u16]> for JsString {
891    #[inline]
892    fn from(s: &[u16]) -> Self {
893        JsString::from_slice(JsStr::utf16(s))
894    }
895}
896
897impl From<&str> for JsString {
898    #[inline]
899    fn from(s: &str) -> Self {
900        // TODO: Check for latin1 encoding
901        if s.is_ascii() {
902            let js_str = JsStr::latin1(s.as_bytes());
903            return StaticJsStrings::get_string(&js_str)
904                .unwrap_or_else(|| JsString::from_slice_skip_interning(js_str));
905        }
906        let s = s.encode_utf16().collect::<Vec<_>>();
907        JsString::from_slice_skip_interning(JsStr::utf16(&s[..]))
908    }
909}
910
911impl From<JsStr<'_>> for JsString {
912    #[inline]
913    fn from(value: JsStr<'_>) -> Self {
914        StaticJsStrings::get_string(&value)
915            .unwrap_or_else(|| JsString::from_slice_skip_interning(value))
916    }
917}
918
919impl From<&[JsString]> for JsString {
920    #[inline]
921    fn from(value: &[JsString]) -> Self {
922        Self::concat_array(&value.iter().map(Self::as_str).collect::<Vec<_>>()[..])
923    }
924}
925
926impl<const N: usize> From<&[JsString; N]> for JsString {
927    #[inline]
928    fn from(value: &[JsString; N]) -> Self {
929        Self::concat_array(&value.iter().map(Self::as_str).collect::<Vec<_>>()[..])
930    }
931}
932
933impl From<String> for JsString {
934    #[inline]
935    fn from(s: String) -> Self {
936        Self::from(s.as_str())
937    }
938}
939
940impl<'a> From<Cow<'a, str>> for JsString {
941    #[inline]
942    fn from(s: Cow<'a, str>) -> Self {
943        match s {
944            Cow::Borrowed(s) => s.into(),
945            Cow::Owned(s) => s.into(),
946        }
947    }
948}
949
950impl<const N: usize> From<&[u16; N]> for JsString {
951    #[inline]
952    fn from(s: &[u16; N]) -> Self {
953        Self::from(&s[..])
954    }
955}
956
957impl Hash for JsString {
958    #[inline]
959    fn hash<H: Hasher>(&self, state: &mut H) {
960        self.as_str().hash(state);
961    }
962}
963
964impl PartialOrd for JsStr<'_> {
965    #[inline]
966    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
967        Some(self.cmp(other))
968    }
969}
970
971impl Ord for JsString {
972    #[inline]
973    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
974        self.as_str().cmp(&other.as_str())
975    }
976}
977
978impl PartialEq for JsString {
979    #[inline]
980    fn eq(&self, other: &Self) -> bool {
981        self.as_str() == other.as_str()
982    }
983}
984
985impl PartialEq<JsString> for [u16] {
986    #[inline]
987    fn eq(&self, other: &JsString) -> bool {
988        if self.len() != other.len() {
989            return false;
990        }
991        for (x, y) in self.iter().copied().zip(other.iter()) {
992            if x != y {
993                return false;
994            }
995        }
996        true
997    }
998}
999
1000impl<const N: usize> PartialEq<JsString> for [u16; N] {
1001    #[inline]
1002    fn eq(&self, other: &JsString) -> bool {
1003        self[..] == *other
1004    }
1005}
1006
1007impl PartialEq<[u16]> for JsString {
1008    #[inline]
1009    fn eq(&self, other: &[u16]) -> bool {
1010        other == self
1011    }
1012}
1013
1014impl<const N: usize> PartialEq<[u16; N]> for JsString {
1015    #[inline]
1016    fn eq(&self, other: &[u16; N]) -> bool {
1017        *self == other[..]
1018    }
1019}
1020
1021impl PartialEq<str> for JsString {
1022    #[inline]
1023    fn eq(&self, other: &str) -> bool {
1024        self.as_str() == other
1025    }
1026}
1027
1028impl PartialEq<&str> for JsString {
1029    #[inline]
1030    fn eq(&self, other: &&str) -> bool {
1031        self.as_str() == *other
1032    }
1033}
1034
1035impl PartialEq<JsString> for str {
1036    #[inline]
1037    fn eq(&self, other: &JsString) -> bool {
1038        other == self
1039    }
1040}
1041
1042impl PartialEq<JsStr<'_>> for JsString {
1043    #[inline]
1044    fn eq(&self, other: &JsStr<'_>) -> bool {
1045        self.as_str() == *other
1046    }
1047}
1048
1049impl PartialEq<JsString> for JsStr<'_> {
1050    #[inline]
1051    fn eq(&self, other: &JsString) -> bool {
1052        other == self
1053    }
1054}
1055
1056impl PartialOrd for JsString {
1057    #[inline]
1058    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
1059        Some(self.cmp(other))
1060    }
1061}
1062
1063impl FromStr for JsString {
1064    type Err = Infallible;
1065
1066    #[inline]
1067    fn from_str(s: &str) -> Result<Self, Self::Err> {
1068        Ok(Self::from(s))
1069    }
1070}