Skip to main content

obeli_sk_boa_string/
lib.rs

1//! A Latin1 or UTF-16 encoded, reference counted, immutable string.
2
3// Required per unsafe code standards to ensure every unsafe usage is properly documented.
4// - `unsafe_op_in_unsafe_fn` will be warn-by-default in edition 2024:
5//   https://github.com/rust-lang/rust/issues/71668#issuecomment-1189396860
6// - `undocumented_unsafe_blocks` and `missing_safety_doc` requires a `Safety:` section in the
7//   comment or doc of the unsafe block or function, respectively.
8#![deny(
9    unsafe_op_in_unsafe_fn,
10    clippy::undocumented_unsafe_blocks,
11    clippy::missing_safety_doc
12)]
13#![allow(clippy::module_name_repetitions)]
14
15mod builder;
16mod code_point;
17mod common;
18mod display;
19mod iter;
20mod str;
21mod r#type;
22mod vtable;
23
24#[cfg(test)]
25mod tests;
26
27use self::iter::Windows;
28use crate::display::{JsStrDisplayEscaped, JsStrDisplayLossy, JsStringDebugInfo};
29use crate::iter::CodePointsIter;
30use crate::r#type::{Latin1, Utf16};
31pub use crate::vtable::StaticString;
32use crate::vtable::{SequenceString, SliceString};
33#[doc(inline)]
34pub use crate::{
35    builder::{CommonJsStringBuilder, Latin1JsStringBuilder, Utf16JsStringBuilder},
36    code_point::CodePoint,
37    common::StaticJsStrings,
38    iter::Iter,
39    str::{JsStr, JsStrVariant},
40};
41use std::marker::PhantomData;
42use std::{borrow::Cow, mem::ManuallyDrop};
43use std::{
44    convert::Infallible,
45    hash::{Hash, Hasher},
46    ptr::{self, NonNull},
47    str::FromStr,
48};
49use vtable::JsStringVTable;
50
51fn alloc_overflow() -> ! {
52    panic!("detected overflow during string allocation")
53}
54
55/// Helper function to check if a `char` is trimmable.
56pub(crate) const fn is_trimmable_whitespace(c: char) -> bool {
57    // The rust implementation of `trim` does not regard the same characters whitespace as
58    // ecma standard does.
59    //
60    // Rust uses \p{White_Space} by default, which also includes:
61    // `\u{0085}' (next line)
62    // And does not include:
63    // '\u{FEFF}' (zero width non-breaking space)
64    // Explicit whitespace: https://tc39.es/ecma262/#sec-white-space
65    matches!(
66        c,
67        '\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{0020}' | '\u{00A0}' | '\u{FEFF}' |
68    // Unicode Space_Separator category
69    '\u{1680}' | '\u{2000}'
70            ..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' |
71    // Line terminators: https://tc39.es/ecma262/#sec-line-terminators
72    '\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}'
73    )
74}
75
76/// Helper function to check if a `u8` latin1 character is trimmable.
77pub(crate) const fn is_trimmable_whitespace_latin1(c: u8) -> bool {
78    // The rust implementation of `trim` does not regard the same characters whitespace as
79    // ecma standard does.
80    //
81    // Rust uses \p{White_Space} by default, which also includes:
82    // `\u{0085}' (next line)
83    // And does not include:
84    // '\u{FEFF}' (zero width non-breaking space)
85    // Explicit whitespace: https://tc39.es/ecma262/#sec-white-space
86    matches!(
87        c,
88        0x09 | 0x0B | 0x0C | 0x20 | 0xA0 |
89        // Line terminators: https://tc39.es/ecma262/#sec-line-terminators
90        0x0A | 0x0D
91    )
92}
93
94/// Opaque type of a raw string pointer.
95#[allow(missing_copy_implementations, missing_debug_implementations)]
96pub struct RawJsString {
97    // Make this non-send, non-sync, invariant and unconstructable.
98    phantom_data: PhantomData<*mut ()>,
99}
100
101/// Strings can be represented internally by multiple kinds. This is used to identify
102/// the storage kind of string.
103#[derive(Debug, Clone, Copy, Eq, PartialEq)]
104#[repr(u8)]
105pub(crate) enum JsStringKind {
106    /// A sequential memory slice of Latin1 bytes. See [`SequenceString`].
107    Latin1Sequence = 0,
108
109    /// A sequential memory slice of UTF-16 code units. See [`SequenceString`].
110    Utf16Sequence = 1,
111
112    /// A slice of an existing string. See [`SliceString`].
113    Slice = 2,
114
115    /// A static string that is valid for `'static` lifetime.
116    Static = 3,
117}
118
119/// A Latin1 or UTF-16–encoded, reference counted, immutable string.
120///
121/// This is pretty similar to a <code>[Rc][std::rc::Rc]\<[\[u16\]][slice]\></code>, but without the
122/// length metadata associated with the `Rc` fat pointer. Instead, the length of every string is
123/// stored on the heap, along with its reference counter and its data.
124///
125/// The string can be latin1 (stored as a byte for space efficiency) or U16 encoding.
126///
127/// We define some commonly used string constants in an interner. For these strings, we don't allocate
128/// memory on the heap to reduce the overhead of memory allocation and reference counting.
129///
130/// # Internal representation
131///
132/// The `ptr` field always points to a structure whose first field is a `JsStringVTable`.
133/// This enables uniform vtable dispatch for all string operations without branching.
134///
135/// Because we ensure this invariant at every construction, we can directly point to this
136/// type to allow for better optimization (and simpler code).
137#[allow(clippy::module_name_repetitions)]
138pub struct JsString {
139    /// Pointer to the string data. Always points to a struct whose first field is
140    /// `JsStringVTable`.
141    ptr: NonNull<JsStringVTable>,
142}
143
144// `JsString` should always be thin-pointer sized.
145static_assertions::assert_eq_size!(JsString, *const ());
146
147impl<'a> From<&'a JsString> for JsStr<'a> {
148    #[inline]
149    fn from(value: &'a JsString) -> Self {
150        value.as_str()
151    }
152}
153
154impl<'a> IntoIterator for &'a JsString {
155    type Item = u16;
156    type IntoIter = Iter<'a>;
157
158    #[inline]
159    fn into_iter(self) -> Self::IntoIter {
160        self.iter()
161    }
162}
163
164impl JsString {
165    /// Create an iterator over the [`JsString`].
166    #[inline]
167    #[must_use]
168    pub fn iter(&self) -> Iter<'_> {
169        self.as_str().iter()
170    }
171
172    /// Create an iterator over overlapping subslices of length size.
173    #[inline]
174    #[must_use]
175    pub fn windows(&self, size: usize) -> Windows<'_> {
176        self.as_str().windows(size)
177    }
178
179    /// Decodes a [`JsString`] into a [`String`], replacing invalid data with its escaped representation
180    /// in 4 digit hexadecimal.
181    #[inline]
182    #[must_use]
183    pub fn to_std_string_escaped(&self) -> String {
184        self.display_escaped().to_string()
185    }
186
187    /// Decodes a [`JsString`] into a [`String`], replacing invalid data with the
188    /// replacement character U+FFFD.
189    #[inline]
190    #[must_use]
191    pub fn to_std_string_lossy(&self) -> String {
192        self.display_lossy().to_string()
193    }
194
195    /// Decodes a [`JsString`] into a [`String`], returning an error if the string contains unpaired
196    /// surrogates.
197    ///
198    /// # Errors
199    ///
200    /// [`FromUtf16Error`][std::string::FromUtf16Error] if it contains any invalid data.
201    #[inline]
202    pub fn to_std_string(&self) -> Result<String, std::string::FromUtf16Error> {
203        self.as_str().to_std_string()
204    }
205
206    /// Decodes a [`JsString`] into an iterator of [`Result<String, u16>`], returning surrogates as
207    /// errors.
208    #[inline]
209    #[allow(clippy::missing_panics_doc)]
210    pub fn to_std_string_with_surrogates(
211        &self,
212    ) -> impl Iterator<Item = Result<String, u16>> + use<'_> {
213        let mut iter = self.code_points().peekable();
214
215        std::iter::from_fn(move || {
216            let cp = iter.next()?;
217            let char = match cp {
218                CodePoint::Unicode(c) => c,
219                CodePoint::UnpairedSurrogate(surr) => return Some(Err(surr)),
220            };
221
222            let mut string = String::from(char);
223
224            loop {
225                let Some(cp) = iter.peek().and_then(|cp| match cp {
226                    CodePoint::Unicode(c) => Some(*c),
227                    CodePoint::UnpairedSurrogate(_) => None,
228                }) else {
229                    break;
230                };
231
232                string.push(cp);
233
234                iter.next().expect("should exist by the check above");
235            }
236
237            Some(Ok(string))
238        })
239    }
240
241    /// Maps the valid segments of an UTF16 string and leaves the unpaired surrogates unchanged.
242    #[inline]
243    #[must_use]
244    pub fn map_valid_segments<F>(&self, mut f: F) -> Self
245    where
246        F: FnMut(String) -> String,
247    {
248        let mut text = Vec::new();
249
250        for part in self.to_std_string_with_surrogates() {
251            match part {
252                Ok(string) => text.extend(f(string).encode_utf16()),
253                Err(surr) => text.push(surr),
254            }
255        }
256
257        Self::from(&text[..])
258    }
259
260    /// Gets an iterator of all the Unicode codepoints of a [`JsString`].
261    #[inline]
262    #[must_use]
263    pub fn code_points(&self) -> CodePointsIter<'_> {
264        (self.vtable().code_points)(self.ptr)
265    }
266
267    /// Get the variant of this string.
268    #[inline]
269    #[must_use]
270    pub fn variant(&self) -> JsStrVariant<'_> {
271        self.as_str().variant()
272    }
273
274    /// Abstract operation `StringIndexOf ( string, searchValue, fromIndex )`
275    ///
276    /// Note: Instead of returning an isize with `-1` as the "not found" value, we make use of the
277    /// type system and return <code>[Option]\<usize\></code> with [`None`] as the "not found" value.
278    ///
279    /// More information:
280    ///  - [ECMAScript reference][spec]
281    ///
282    /// [spec]: https://tc39.es/ecma262/#sec-stringindexof
283    #[inline]
284    #[must_use]
285    pub fn index_of(&self, search_value: JsStr<'_>, from_index: usize) -> Option<usize> {
286        self.as_str().index_of(search_value, from_index)
287    }
288
289    /// Abstract operation `CodePointAt( string, position )`.
290    ///
291    /// The abstract operation `CodePointAt` takes arguments `string` (a String) and `position` (a
292    /// non-negative integer) and returns a Record with fields `[[CodePoint]]` (a code point),
293    /// `[[CodeUnitCount]]` (a positive integer), and `[[IsUnpairedSurrogate]]` (a Boolean). It
294    /// interprets string as a sequence of UTF-16 encoded code points, as described in 6.1.4, and reads
295    /// from it a single code point starting with the code unit at index `position`.
296    ///
297    /// More information:
298    ///  - [ECMAScript reference][spec]
299    ///
300    /// [spec]: https://tc39.es/ecma262/#sec-codepointat
301    ///
302    /// # Panics
303    ///
304    /// If `position` is smaller than size of string.
305    #[inline]
306    #[must_use]
307    pub fn code_point_at(&self, position: usize) -> CodePoint {
308        self.as_str().code_point_at(position)
309    }
310
311    /// Abstract operation `StringToNumber ( str )`
312    ///
313    /// More information:
314    /// - [ECMAScript reference][spec]
315    ///
316    /// [spec]: https://tc39.es/ecma262/#sec-stringtonumber
317    #[inline]
318    #[must_use]
319    pub fn to_number(&self) -> f64 {
320        self.as_str().to_number()
321    }
322
323    /// Get the length of the [`JsString`].
324    #[inline]
325    #[must_use]
326    pub fn len(&self) -> usize {
327        self.vtable().len
328    }
329
330    /// Return true if the [`JsString`] is empty.
331    #[inline]
332    #[must_use]
333    pub fn is_empty(&self) -> bool {
334        self.len() == 0
335    }
336
337    /// Convert the [`JsString`] into a [`Vec<U16>`].
338    #[inline]
339    #[must_use]
340    pub fn to_vec(&self) -> Vec<u16> {
341        self.as_str().to_vec()
342    }
343
344    /// Check if the [`JsString`] contains a byte.
345    #[inline]
346    #[must_use]
347    pub fn contains(&self, element: u8) -> bool {
348        self.as_str().contains(element)
349    }
350
351    /// Trim whitespace from the start and end of the [`JsString`].
352    #[inline]
353    #[must_use]
354    pub fn trim(&self) -> JsString {
355        // Calculate both bounds directly to avoid intermediate allocations.
356        let (start, end) = match self.variant() {
357            JsStrVariant::Latin1(v) => {
358                let Some(start) = v.iter().position(|c| !is_trimmable_whitespace_latin1(*c)) else {
359                    return StaticJsStrings::EMPTY_STRING;
360                };
361                let end = v
362                    .iter()
363                    .rposition(|c| !is_trimmable_whitespace_latin1(*c))
364                    .unwrap_or(start);
365                (start, end)
366            }
367            JsStrVariant::Utf16(v) => {
368                let Some(start) = v.iter().copied().position(|r| {
369                    !char::from_u32(u32::from(r)).is_some_and(is_trimmable_whitespace)
370                }) else {
371                    return StaticJsStrings::EMPTY_STRING;
372                };
373                let end = v
374                    .iter()
375                    .copied()
376                    .rposition(|r| {
377                        !char::from_u32(u32::from(r)).is_some_and(is_trimmable_whitespace)
378                    })
379                    .unwrap_or(start);
380                (start, end)
381            }
382        };
383
384        // SAFETY: `position(...)` and `rposition(...)` cannot exceed the length of the string.
385        unsafe { Self::slice_unchecked(self, start, end + 1) }
386    }
387
388    /// Trim whitespace from the start of the [`JsString`].
389    #[inline]
390    #[must_use]
391    pub fn trim_start(&self) -> JsString {
392        let Some(start) = (match self.variant() {
393            JsStrVariant::Latin1(v) => v.iter().position(|c| !is_trimmable_whitespace_latin1(*c)),
394            JsStrVariant::Utf16(v) => v
395                .iter()
396                .copied()
397                .position(|r| !char::from_u32(u32::from(r)).is_some_and(is_trimmable_whitespace)),
398        }) else {
399            return StaticJsStrings::EMPTY_STRING;
400        };
401
402        // SAFETY: `position(...)` cannot exceed the length of the string.
403        unsafe { Self::slice_unchecked(self, start, self.len()) }
404    }
405
406    /// Trim whitespace from the end of the [`JsString`].
407    #[inline]
408    #[must_use]
409    pub fn trim_end(&self) -> JsString {
410        let Some(end) = (match self.variant() {
411            JsStrVariant::Latin1(v) => v.iter().rposition(|c| !is_trimmable_whitespace_latin1(*c)),
412            JsStrVariant::Utf16(v) => v
413                .iter()
414                .copied()
415                .rposition(|r| !char::from_u32(u32::from(r)).is_some_and(is_trimmable_whitespace)),
416        }) else {
417            return StaticJsStrings::EMPTY_STRING;
418        };
419
420        // SAFETY: `rposition(...)` cannot exceed the length of the string. `end` is the first
421        //         character that is not trimmable, therefore we need to add 1 to it.
422        unsafe { Self::slice_unchecked(self, 0, end + 1) }
423    }
424
425    /// Returns true if needle is a prefix of the [`JsStr`].
426    #[inline]
427    #[must_use]
428    // We check the size, so this should never panic.
429    #[allow(clippy::missing_panics_doc)]
430    pub fn starts_with(&self, needle: JsStr<'_>) -> bool {
431        self.as_str().starts_with(needle)
432    }
433
434    /// Returns `true` if `needle` is a suffix of the [`JsStr`].
435    #[inline]
436    #[must_use]
437    // We check the size, so this should never panic.
438    #[allow(clippy::missing_panics_doc)]
439    pub fn ends_with(&self, needle: JsStr<'_>) -> bool {
440        self.as_str().starts_with(needle)
441    }
442
443    /// Get the `u16` code unit at index. This does not parse any characters if there
444    /// are pairs, it is simply the index of the `u16` elements.
445    #[inline]
446    #[must_use]
447    pub fn code_unit_at(&self, index: usize) -> Option<u16> {
448        self.as_str().get(index)
449    }
450
451    /// Get the element at the given index, or [`None`] if the index is out of range.
452    #[inline]
453    #[must_use]
454    pub fn get<I>(&self, index: I) -> Option<JsString>
455    where
456        I: JsStringSliceIndex,
457    {
458        index.get(self)
459    }
460
461    /// Get the element at the given index, or panic.
462    ///
463    /// # Panics
464    /// If the index returns `None`, this will panic.
465    #[inline]
466    #[must_use]
467    pub fn get_expect<I>(&self, index: I) -> JsString
468    where
469        I: JsStringSliceIndex,
470    {
471        index.get(self).expect("Unexpected get()")
472    }
473
474    /// Gets a displayable escaped string. This may be faster and has fewer
475    /// allocations than `format!("{}", str.to_string_escaped())` when
476    /// displaying.
477    #[inline]
478    #[must_use]
479    pub fn display_escaped(&self) -> JsStrDisplayEscaped<'_> {
480        JsStrDisplayEscaped::from(self)
481    }
482
483    /// Gets a displayable lossy string. This may be faster and has fewer
484    /// allocations than `format!("{}", str.to_string_lossy())` when displaying.
485    #[inline]
486    #[must_use]
487    pub fn display_lossy(&self) -> JsStrDisplayLossy<'_> {
488        self.as_str().display_lossy()
489    }
490
491    /// Get a debug displayable info and metadata for this string.
492    #[inline]
493    #[must_use]
494    pub fn debug_info(&self) -> JsStringDebugInfo<'_> {
495        self.into()
496    }
497
498    /// Consumes the [`JsString`], returning the internal pointer.
499    ///
500    /// To avoid a memory leak the pointer must be converted back to a `JsString` using
501    /// [`JsString::from_raw`].
502    #[inline]
503    #[must_use]
504    pub fn into_raw(self) -> NonNull<RawJsString> {
505        ManuallyDrop::new(self).ptr.cast()
506    }
507
508    /// Constructs a `JsString` from the internal pointer.
509    ///
510    /// The raw pointer must have been previously returned by a call to
511    /// [`JsString::into_raw`].
512    ///
513    /// # Safety
514    ///
515    /// This function is unsafe because improper use may lead to memory unsafety,
516    /// even if the returned `JsString` is never accessed.
517    #[inline]
518    #[must_use]
519    pub const unsafe fn from_raw(ptr: NonNull<RawJsString>) -> Self {
520        Self { ptr: ptr.cast() }
521    }
522
523    /// Constructs a `JsString` from a reference to a `VTable`.
524    ///
525    /// # Safety
526    ///
527    /// This function is unsafe because improper use may lead to memory unsafety,
528    /// even if the returned `JsString` is never accessed.
529    #[inline]
530    #[must_use]
531    pub(crate) const unsafe fn from_ptr(ptr: NonNull<JsStringVTable>) -> Self {
532        Self { ptr }
533    }
534}
535
536// `&JsStr<'static>` must always be aligned so it can be tagged.
537static_assertions::const_assert!(align_of::<*const JsStr<'static>>() >= 2);
538
539/// Dealing with inner types.
540impl JsString {
541    /// Check if this is a static string.
542    #[inline]
543    #[must_use]
544    pub fn is_static(&self) -> bool {
545        // Check the vtable kind tag
546        self.vtable().kind == JsStringKind::Static
547    }
548
549    /// Get the vtable for this string.
550    #[inline]
551    #[must_use]
552    const fn vtable(&self) -> &JsStringVTable {
553        // SAFETY: All JsString variants have vtable as the first field (embedded directly).
554        unsafe { self.ptr.as_ref() }
555    }
556
557    /// Create a [`JsString`] from a [`StaticString`] instance. This is assumed that the
558    /// static string referenced is available for the duration of the `JsString` instance
559    /// returned.
560    #[inline]
561    #[must_use]
562    pub const fn from_static(str: &'static StaticString) -> Self {
563        Self {
564            ptr: NonNull::from_ref(str).cast(),
565        }
566    }
567
568    /// Create a [`JsString`] from an existing `JsString` and start, end
569    /// range. `end` is 1 past the last character (or `== data.len()`
570    /// for the last character).
571    ///
572    /// # Safety
573    /// It is the responsibility of the caller to ensure:
574    ///   - `start` <= `end`. If `start` == `end`, the string is empty.
575    ///   - `end` <= `data.len()`.
576    #[inline]
577    #[must_use]
578    pub unsafe fn slice_unchecked(data: &JsString, start: usize, end: usize) -> Self {
579        // Safety: invariant stated by this whole function.
580        let slice = Box::new(unsafe { SliceString::new(data, start, end) });
581
582        Self {
583            ptr: NonNull::from(Box::leak(slice)).cast(),
584        }
585    }
586
587    /// Create a [`JsString`] from an existing `JsString` and start, end
588    /// range. Returns None if the start/end is invalid.
589    #[inline]
590    #[must_use]
591    pub fn slice(&self, p1: usize, mut p2: usize) -> JsString {
592        if p2 > self.len() {
593            p2 = self.len();
594        }
595        if p1 >= p2 {
596            StaticJsStrings::EMPTY_STRING
597        } else {
598            // SAFETY: We just checked the conditions.
599            unsafe { Self::slice_unchecked(self, p1, p2) }
600        }
601    }
602
603    /// Get the kind of this string (for debugging/introspection).
604    #[inline]
605    #[must_use]
606    pub(crate) fn kind(&self) -> JsStringKind {
607        self.vtable().kind
608    }
609
610    /// Get the inner pointer as a reference of type T.
611    ///
612    /// # Safety
613    /// This should only be used when the inner type has been validated via `kind()`.
614    /// Using an unvalidated inner type is undefined behaviour.
615    #[inline]
616    pub(crate) unsafe fn as_inner<T>(&self) -> &T {
617        // SAFETY: Caller must ensure the type matches.
618        unsafe { self.ptr.cast::<T>().as_ref() }
619    }
620}
621
622impl JsString {
623    /// Obtains the underlying [`&[u16]`][slice] slice of a [`JsString`]
624    #[inline]
625    #[must_use]
626    pub fn as_str(&self) -> JsStr<'_> {
627        (self.vtable().as_str)(self.ptr)
628    }
629
630    /// Creates a new [`JsString`] from the concatenation of `x` and `y`.
631    #[inline]
632    #[must_use]
633    pub fn concat(x: JsStr<'_>, y: JsStr<'_>) -> Self {
634        Self::concat_array(&[x, y])
635    }
636
637    /// Creates a new [`JsString`] from the concatenation of every element of
638    /// `strings`.
639    #[inline]
640    #[must_use]
641    pub fn concat_array(strings: &[JsStr<'_>]) -> Self {
642        let mut latin1_encoding = true;
643        let mut full_count = 0usize;
644        for string in strings {
645            let Some(sum) = full_count.checked_add(string.len()) else {
646                alloc_overflow()
647            };
648            if !string.is_latin1() {
649                latin1_encoding = false;
650            }
651            full_count = sum;
652        }
653
654        let (ptr, data_offset) = if latin1_encoding {
655            let p = SequenceString::<Latin1>::allocate(full_count);
656            (p.cast::<u8>(), size_of::<SequenceString<Latin1>>())
657        } else {
658            let p = SequenceString::<Utf16>::allocate(full_count);
659            (p.cast::<u8>(), size_of::<SequenceString<Utf16>>())
660        };
661
662        let string = {
663            // SAFETY: `allocate_*_seq` guarantees that `ptr` is a valid pointer to a sequence string.
664            let mut data = unsafe {
665                let seq_ptr = ptr.as_ptr();
666                seq_ptr.add(data_offset)
667            };
668            for &string in strings {
669                // SAFETY:
670                // The sum of all `count` for each `string` equals `full_count`, and since we're
671                // iteratively writing each of them to `data`, `copy_non_overlapping` always stays
672                // in-bounds for `count` reads of each string and `full_count` writes to `data`.
673                //
674                // Each `string` must be properly aligned to be a valid slice, and `data` must be
675                // properly aligned by `allocate_seq`.
676                //
677                // `allocate_seq` must return a valid pointer to newly allocated memory, meaning
678                // `ptr` and all `string`s should never overlap.
679                unsafe {
680                    // NOTE: The alignment is checked when we allocate the array.
681                    #[allow(clippy::cast_ptr_alignment)]
682                    match (latin1_encoding, string.variant()) {
683                        (true, JsStrVariant::Latin1(s)) => {
684                            let count = s.len();
685                            ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u8>(), count);
686                            data = data.cast::<u8>().add(count).cast::<u8>();
687                        }
688                        (false, JsStrVariant::Latin1(s)) => {
689                            let count = s.len();
690                            for (i, byte) in s.iter().enumerate() {
691                                *data.cast::<u16>().add(i) = u16::from(*byte);
692                            }
693                            data = data.cast::<u16>().add(count).cast::<u8>();
694                        }
695                        (false, JsStrVariant::Utf16(s)) => {
696                            let count = s.len();
697                            ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u16>(), count);
698                            data = data.cast::<u16>().add(count).cast::<u8>();
699                        }
700                        (true, JsStrVariant::Utf16(_)) => {
701                            unreachable!("Already checked that it's latin1 encoding")
702                        }
703                    }
704                }
705            }
706
707            Self { ptr: ptr.cast() }
708        };
709
710        StaticJsStrings::get_string(&string.as_str()).unwrap_or(string)
711    }
712
713    /// Creates a new [`JsString`] from `data`, without checking if the string is in the interner.
714    fn from_slice_skip_interning(string: JsStr<'_>) -> Self {
715        let count = string.len();
716
717        // SAFETY:
718        // - We read `count = data.len()` elements from `data`, which is within the bounds of the slice.
719        // - `allocate_*_seq` must allocate at least `count` elements, which allows us to safely
720        //   write at least `count` elements.
721        // - `allocate_*_seq` should already take care of the alignment of `ptr`, and `data` must be
722        //   aligned to be a valid slice.
723        // - `allocate_*_seq` must return a valid pointer to newly allocated memory, meaning `ptr`
724        //   and `data` should never overlap.
725        unsafe {
726            // NOTE: The alignment is checked when we allocate the array.
727            #[allow(clippy::cast_ptr_alignment)]
728            match string.variant() {
729                JsStrVariant::Latin1(s) => {
730                    let ptr = SequenceString::<Latin1>::allocate(count);
731                    let data = (&raw mut (*ptr.as_ptr()).data)
732                        .cast::<<Latin1 as r#type::StringType>::Byte>();
733                    ptr::copy_nonoverlapping(s.as_ptr(), data, count);
734                    Self { ptr: ptr.cast() }
735                }
736                JsStrVariant::Utf16(s) => {
737                    let ptr = SequenceString::<Utf16>::allocate(count);
738                    let data = (&raw mut (*ptr.as_ptr()).data)
739                        .cast::<<Utf16 as r#type::StringType>::Byte>();
740                    ptr::copy_nonoverlapping(s.as_ptr(), data, count);
741                    Self { ptr: ptr.cast() }
742                }
743            }
744        }
745    }
746
747    /// Creates a new [`JsString`] from `data`.
748    fn from_js_str(string: JsStr<'_>) -> Self {
749        if let Some(s) = StaticJsStrings::get_string(&string) {
750            return s;
751        }
752        Self::from_slice_skip_interning(string)
753    }
754
755    /// Gets the number of `JsString`s which point to this allocation.
756    #[inline]
757    #[must_use]
758    pub fn refcount(&self) -> Option<usize> {
759        (self.vtable().refcount)(self.ptr)
760    }
761}
762
763impl Clone for JsString {
764    #[inline]
765    fn clone(&self) -> Self {
766        (self.vtable().clone)(self.ptr)
767    }
768}
769
770impl Default for JsString {
771    #[inline]
772    fn default() -> Self {
773        StaticJsStrings::EMPTY_STRING
774    }
775}
776
777impl Drop for JsString {
778    #[inline]
779    fn drop(&mut self) {
780        (self.vtable().drop)(self.ptr);
781    }
782}
783
784impl std::fmt::Debug for JsString {
785    #[inline]
786    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
787        f.debug_tuple("JsString")
788            .field(&self.display_escaped().to_string())
789            .finish()
790    }
791}
792
793impl Eq for JsString {}
794
795macro_rules! impl_from_number_for_js_string {
796    ($($module: ident => $($ty:ty),+)+) => {
797        $(
798            $(
799                impl From<$ty> for JsString {
800                    #[inline]
801                    fn from(value: $ty) -> Self {
802                        JsString::from_slice_skip_interning(JsStr::latin1(
803                            $module::Buffer::new().format(value).as_bytes(),
804                        ))
805                    }
806                }
807            )+
808        )+
809    };
810}
811
812impl_from_number_for_js_string!(
813    itoa => i8, i16, i32, i64, i128, u8, u16, u32, u64, u128, isize, usize
814    ryu_js => f32, f64
815);
816
817impl From<&[u16]> for JsString {
818    #[inline]
819    fn from(s: &[u16]) -> Self {
820        JsString::from_js_str(JsStr::utf16(s))
821    }
822}
823
824impl From<&str> for JsString {
825    #[inline]
826    fn from(s: &str) -> Self {
827        if s.is_ascii() {
828            let js_str = JsStr::latin1(s.as_bytes());
829            return StaticJsStrings::get_string(&js_str)
830                .unwrap_or_else(|| JsString::from_slice_skip_interning(js_str));
831        }
832        // Non-ASCII but still Latin1-encodable (U+0080..=U+00FF): chars map 1-to-1 to u8.
833        if s.chars().all(|c| c as u32 <= 0xFF) {
834            let bytes: Vec<u8> = s.chars().map(|c| c as u8).collect();
835            let js_str = JsStr::latin1(&bytes);
836            return StaticJsStrings::get_string(&js_str)
837                .unwrap_or_else(|| JsString::from_slice_skip_interning(js_str));
838        }
839        let s = s.encode_utf16().collect::<Vec<_>>();
840        JsString::from_slice_skip_interning(JsStr::utf16(&s[..]))
841    }
842}
843
844impl From<JsStr<'_>> for JsString {
845    #[inline]
846    fn from(value: JsStr<'_>) -> Self {
847        StaticJsStrings::get_string(&value)
848            .unwrap_or_else(|| JsString::from_slice_skip_interning(value))
849    }
850}
851
852impl From<&[JsString]> for JsString {
853    #[inline]
854    fn from(value: &[JsString]) -> Self {
855        Self::concat_array(&value.iter().map(Self::as_str).collect::<Vec<_>>()[..])
856    }
857}
858
859impl<const N: usize> From<&[JsString; N]> for JsString {
860    #[inline]
861    fn from(value: &[JsString; N]) -> Self {
862        Self::concat_array(&value.iter().map(Self::as_str).collect::<Vec<_>>()[..])
863    }
864}
865
866impl From<String> for JsString {
867    #[inline]
868    fn from(s: String) -> Self {
869        Self::from(s.as_str())
870    }
871}
872
873impl<'a> From<Cow<'a, str>> for JsString {
874    #[inline]
875    fn from(s: Cow<'a, str>) -> Self {
876        match s {
877            Cow::Borrowed(s) => s.into(),
878            Cow::Owned(s) => s.into(),
879        }
880    }
881}
882
883impl<const N: usize> From<&[u16; N]> for JsString {
884    #[inline]
885    fn from(s: &[u16; N]) -> Self {
886        Self::from(&s[..])
887    }
888}
889
890impl Hash for JsString {
891    #[inline]
892    fn hash<H: Hasher>(&self, state: &mut H) {
893        self.as_str().hash(state);
894    }
895}
896
897impl PartialOrd for JsStr<'_> {
898    #[inline]
899    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
900        Some(self.cmp(other))
901    }
902}
903
904impl Ord for JsString {
905    #[inline]
906    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
907        self.as_str().cmp(&other.as_str())
908    }
909}
910
911impl PartialEq for JsString {
912    #[inline]
913    fn eq(&self, other: &Self) -> bool {
914        self.as_str() == other.as_str()
915    }
916}
917
918impl PartialEq<JsString> for [u16] {
919    #[inline]
920    fn eq(&self, other: &JsString) -> bool {
921        if self.len() != other.len() {
922            return false;
923        }
924        for (x, y) in self.iter().copied().zip(other.iter()) {
925            if x != y {
926                return false;
927            }
928        }
929        true
930    }
931}
932
933impl<const N: usize> PartialEq<JsString> for [u16; N] {
934    #[inline]
935    fn eq(&self, other: &JsString) -> bool {
936        self[..] == *other
937    }
938}
939
940impl PartialEq<[u16]> for JsString {
941    #[inline]
942    fn eq(&self, other: &[u16]) -> bool {
943        other == self
944    }
945}
946
947impl<const N: usize> PartialEq<[u16; N]> for JsString {
948    #[inline]
949    fn eq(&self, other: &[u16; N]) -> bool {
950        *self == other[..]
951    }
952}
953
954impl PartialEq<str> for JsString {
955    #[inline]
956    fn eq(&self, other: &str) -> bool {
957        self.as_str() == other
958    }
959}
960
961impl PartialEq<&str> for JsString {
962    #[inline]
963    fn eq(&self, other: &&str) -> bool {
964        self.as_str() == *other
965    }
966}
967
968impl PartialEq<JsString> for str {
969    #[inline]
970    fn eq(&self, other: &JsString) -> bool {
971        other == self
972    }
973}
974
975impl PartialEq<JsStr<'_>> for JsString {
976    #[inline]
977    fn eq(&self, other: &JsStr<'_>) -> bool {
978        self.as_str() == *other
979    }
980}
981
982impl PartialEq<JsString> for JsStr<'_> {
983    #[inline]
984    fn eq(&self, other: &JsString) -> bool {
985        other == self
986    }
987}
988
989impl PartialOrd for JsString {
990    #[inline]
991    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
992        Some(self.cmp(other))
993    }
994}
995
996impl FromStr for JsString {
997    type Err = Infallible;
998
999    #[inline]
1000    fn from_str(s: &str) -> Result<Self, Self::Err> {
1001        Ok(Self::from(s))
1002    }
1003}
1004
1005/// Similar to [`std::ops::RangeBounds`] but custom implemented for getting direct indices.
1006// TODO: remove [`str::JsSliceIndex`] and rename this when `JsStr` is no more.
1007pub trait JsStringSliceIndex {
1008    /// Get the substring (or `None` if outside the string).
1009    fn get(self, str: &JsString) -> Option<JsString>;
1010}
1011
1012macro_rules! impl_js_string_slice_index {
1013    ($($type:ty),+ $(,)?) => {
1014        $(
1015        impl JsStringSliceIndex for $type {
1016            fn get(self, str: &JsString) -> Option<JsString> {
1017                let start = match std::ops::RangeBounds::<usize>::start_bound(&self) {
1018                    std::ops::Bound::Included(start) => *start,
1019                    std::ops::Bound::Excluded(start) => *start + 1,
1020                    std::ops::Bound::Unbounded => 0,
1021                };
1022
1023                let end = match std::ops::RangeBounds::<usize>::end_bound(&self) {
1024                    std::ops::Bound::Included(end) => *end + 1,
1025                    std::ops::Bound::Excluded(end) => *end,
1026                    std::ops::Bound::Unbounded => str.len(),
1027                };
1028
1029                if end > str.len() || start > end {
1030                    None
1031                } else {
1032                    // SAFETY: we just checked the indices.
1033                    Some(unsafe { JsString::slice_unchecked(str, start, end) })
1034                }
1035            }
1036        }
1037        )+
1038    };
1039}
1040
1041impl_js_string_slice_index!(
1042    std::ops::Range<usize>,
1043    std::ops::RangeInclusive<usize>,
1044    std::ops::RangeTo<usize>,
1045    std::ops::RangeToInclusive<usize>,
1046    std::ops::RangeFrom<usize>,
1047    std::ops::RangeFull,
1048);