boa/
string.rs

1use crate::{
2    builtins::string::is_trimmable_whitespace,
3    gc::{empty_trace, Finalize, Trace},
4};
5use rustc_hash::FxHashSet;
6use std::{
7    alloc::{alloc, dealloc, Layout},
8    borrow::Borrow,
9    cell::Cell,
10    hash::{Hash, Hasher},
11    marker::PhantomData,
12    ops::Deref,
13    ptr::{copy_nonoverlapping, NonNull},
14};
15
16const CONSTANTS_ARRAY: [&str; 127] = [
17    // Empty string
18    "",
19    // Misc
20    ",",
21    ":",
22    // Generic use
23    "name",
24    "length",
25    "arguments",
26    "prototype",
27    "constructor",
28    // typeof
29    "null",
30    "undefined",
31    "number",
32    "string",
33    "symbol",
34    "bigint",
35    "object",
36    "function",
37    // Property descriptor
38    "value",
39    "get",
40    "set",
41    "writable",
42    "enumerable",
43    "configurable",
44    // Object object
45    "Object",
46    "assing",
47    "create",
48    "toString",
49    "valueOf",
50    "is",
51    "seal",
52    "isSealed",
53    "freeze",
54    "isFrozen",
55    "keys",
56    "values",
57    "entries",
58    // Function object
59    "Function",
60    "apply",
61    "bind",
62    "call",
63    // Array object
64    "Array",
65    "from",
66    "isArray",
67    "of",
68    "get [Symbol.species]",
69    "copyWithin",
70    "entries",
71    "every",
72    "fill",
73    "filter",
74    "find",
75    "findIndex",
76    "flat",
77    "flatMap",
78    "forEach",
79    "includes",
80    "indexOf",
81    "join",
82    "map",
83    "reduce",
84    "reduceRight",
85    "reverse",
86    "shift",
87    "slice",
88    "some",
89    "sort",
90    "unshift",
91    "push",
92    "pop",
93    // String object
94    "String",
95    "charAt",
96    "charCodeAt",
97    "concat",
98    "endsWith",
99    "includes",
100    "indexOf",
101    "lastIndexOf",
102    "match",
103    "matchAll",
104    "normalize",
105    "padEnd",
106    "padStart",
107    "repeat",
108    "replace",
109    "replaceAll",
110    "search",
111    "slice",
112    "split",
113    "startsWith",
114    "substring",
115    "toLowerString",
116    "toUpperString",
117    "trim",
118    "trimEnd",
119    "trimStart",
120    // Number object
121    "Number",
122    // Boolean object
123    "Boolean",
124    // RegExp object
125    "RegExp",
126    "exec",
127    "test",
128    "flags",
129    "index",
130    "lastIndex",
131    // Symbol object
132    "Symbol",
133    "for",
134    "keyFor",
135    "description",
136    "[Symbol.toPrimitive]",
137    "",
138    // Map object
139    "Map",
140    "clear",
141    "delete",
142    "get",
143    "has",
144    "set",
145    "size",
146    // Set object
147    "Set",
148    // Reflect object
149    "Reflect",
150    // Error objects
151    "Error",
152    "TypeError",
153    "RangeError",
154    "SyntaxError",
155    "ReferenceError",
156    "EvalError",
157    "URIError",
158    "message",
159    // Date object
160    "Date",
161    "toJSON",
162];
163
164const MAX_CONSTANT_STRING_LENGTH: usize = {
165    let mut max = 0;
166    let mut i = 0;
167    while i < CONSTANTS_ARRAY.len() {
168        let len = CONSTANTS_ARRAY[i].len();
169        if len > max {
170            max = len;
171        }
172        i += 1;
173    }
174    max
175};
176
177thread_local! {
178    static CONSTANTS: FxHashSet<JsString> = {
179        let mut constants = FxHashSet::default();
180
181        for s in CONSTANTS_ARRAY.iter() {
182            let s = JsString {
183                inner: Inner::new(s),
184                _marker: PhantomData,
185            };
186            constants.insert(s);
187        }
188
189        constants
190    };
191}
192
193/// The inner representation of a [`JsString`].
194#[repr(C)]
195struct Inner {
196    /// The utf8 length, the number of bytes.
197    len: usize,
198
199    /// The number of references to the string.
200    ///
201    /// When this reaches `0` the string is deallocated.
202    refcount: Cell<usize>,
203
204    /// An empty array which is used to get the offset of string data.
205    data: [u8; 0],
206}
207
208impl Inner {
209    /// Create a new `Inner` from `&str`.
210    #[inline]
211    fn new(s: &str) -> NonNull<Self> {
212        // We get the layout of the `Inner` type and we extend by the size
213        // of the string array.
214        let inner_layout = Layout::new::<Inner>();
215        let (layout, offset) = inner_layout
216            .extend(Layout::array::<u8>(s.len()).unwrap())
217            .unwrap();
218
219        let inner = unsafe {
220            let inner = alloc(layout) as *mut Inner;
221
222            // Write the first part, the Inner.
223            inner.write(Inner {
224                len: s.len(),
225                refcount: Cell::new(1),
226                data: [0; 0],
227            });
228
229            // Get offset into the string data.
230            let data = (*inner).data.as_mut_ptr();
231
232            debug_assert!(std::ptr::eq(inner.cast::<u8>().add(offset), data));
233
234            // Copy string data into data offset.
235            copy_nonoverlapping(s.as_ptr(), data, s.len());
236
237            inner
238        };
239
240        // Safety: We already know it's not null, so this is safe.
241        unsafe { NonNull::new_unchecked(inner) }
242    }
243
244    /// Concatenate array of strings.
245    #[inline]
246    fn concat_array(strings: &[&str]) -> NonNull<Inner> {
247        let mut total_string_size = 0;
248        for string in strings {
249            total_string_size += string.len();
250        }
251
252        // We get the layout of the `Inner` type and we extend by the size
253        // of the string array.
254        let inner_layout = Layout::new::<Inner>();
255        let (layout, offset) = inner_layout
256            .extend(Layout::array::<u8>(total_string_size).unwrap())
257            .unwrap();
258
259        let inner = unsafe {
260            let inner = alloc(layout) as *mut Inner;
261
262            // Write the first part, the Inner.
263            inner.write(Inner {
264                len: total_string_size,
265                refcount: Cell::new(1),
266                data: [0; 0],
267            });
268
269            // Get offset into the string data.
270            let data = (*inner).data.as_mut_ptr();
271
272            debug_assert!(std::ptr::eq(inner.cast::<u8>().add(offset), data));
273
274            // Copy the two string data into data offset.
275            let mut offset = 0;
276            for string in strings {
277                copy_nonoverlapping(string.as_ptr(), data.add(offset), string.len());
278                offset += string.len();
279            }
280
281            inner
282        };
283
284        // Safety: We already know it's not null, so this is safe.
285        unsafe { NonNull::new_unchecked(inner) }
286    }
287
288    /// Deallocate inner type with string data.
289    #[inline]
290    unsafe fn dealloc(x: NonNull<Inner>) {
291        let len = (*x.as_ptr()).len;
292
293        let inner_layout = Layout::new::<Inner>();
294        let (layout, _offset) = inner_layout
295            .extend(Layout::array::<u8>(len).unwrap())
296            .unwrap();
297
298        dealloc(x.as_ptr() as _, layout);
299    }
300}
301
302/// This represents a JavaScript primitive string.
303///
304/// This is similar to `Rc<str>`. But unlike `Rc<str>` which stores the length
305/// on the stack and a pointer to the data (this is also known as fat pointers).
306/// The `JsString` length and data is stored on the heap. and just an non-null
307/// pointer is kept, so its size is the size of a pointer.
308pub struct JsString {
309    inner: NonNull<Inner>,
310    _marker: PhantomData<std::rc::Rc<str>>,
311}
312
313impl Default for JsString {
314    #[inline]
315    fn default() -> Self {
316        Self::new("")
317    }
318}
319
320impl JsString {
321    /// Create an empty string, same as calling default.
322    #[inline]
323    pub fn empty() -> Self {
324        JsString::default()
325    }
326
327    /// Create a new JavaScript string.
328    #[inline]
329    pub fn new<S: AsRef<str>>(s: S) -> Self {
330        let s = s.as_ref();
331
332        if s.len() <= MAX_CONSTANT_STRING_LENGTH {
333            if let Some(constant) = CONSTANTS.with(|c| c.get(s).cloned()) {
334                return constant;
335            }
336        }
337
338        Self {
339            inner: Inner::new(s),
340            _marker: PhantomData,
341        }
342    }
343
344    /// Concatenate two string.
345    pub fn concat<T, U>(x: T, y: U) -> JsString
346    where
347        T: AsRef<str>,
348        U: AsRef<str>,
349    {
350        let x = x.as_ref();
351        let y = y.as_ref();
352
353        let this = Self {
354            inner: Inner::concat_array(&[x, y]),
355            _marker: PhantomData,
356        };
357
358        if this.len() <= MAX_CONSTANT_STRING_LENGTH {
359            if let Some(constant) = CONSTANTS.with(|c| c.get(&this).cloned()) {
360                return constant;
361            }
362        }
363
364        this
365    }
366
367    /// Concatenate array of string.
368    pub fn concat_array(strings: &[&str]) -> JsString {
369        let this = Self {
370            inner: Inner::concat_array(strings),
371            _marker: PhantomData,
372        };
373
374        if this.len() <= MAX_CONSTANT_STRING_LENGTH {
375            if let Some(constant) = CONSTANTS.with(|c| c.get(&this).cloned()) {
376                return constant;
377            }
378        }
379
380        this
381    }
382
383    /// Return the inner representation.
384    #[inline]
385    fn inner(&self) -> &Inner {
386        unsafe { self.inner.as_ref() }
387    }
388
389    /// Return the JavaScript string as a rust `&str`.
390    #[inline]
391    pub fn as_str(&self) -> &str {
392        let inner = self.inner();
393
394        unsafe {
395            let slice = std::slice::from_raw_parts(inner.data.as_ptr(), inner.len);
396            std::str::from_utf8_unchecked(slice)
397        }
398    }
399
400    /// Gets the number of `JsString`s which point to this allocation.
401    #[inline]
402    pub fn refcount(this: &Self) -> usize {
403        this.inner().refcount.get()
404    }
405
406    /// Returns `true` if the two `JsString`s point to the same allocation (in a vein similar to [`ptr::eq`]).
407    ///
408    /// [`ptr::eq`]: std::ptr::eq
409    #[inline]
410    pub fn ptr_eq(x: &Self, y: &Self) -> bool {
411        x.inner == y.inner
412    }
413
414    /// `6.1.4.1 StringIndexOf ( string, searchValue, fromIndex )`
415    ///
416    /// Note: Instead of returning an isize with `-1` as the "not found" value,
417    /// We make use of the type system and return Option<usize> with None as the "not found" value.
418    ///
419    /// More information:
420    ///  - [ECMAScript reference][spec]
421    ///
422    /// [spec]: https://tc39.es/ecma262/#sec-stringindexof
423    pub(crate) fn index_of(&self, search_value: &Self, from_index: usize) -> Option<usize> {
424        // 1. Assert: Type(string) is String.
425        // 2. Assert: Type(searchValue) is String.
426        // 3. Assert: fromIndex is a non-negative integer.
427
428        // 4. Let len be the length of string.
429        let len = self.encode_utf16().count();
430
431        // 5. If searchValue is the empty String and fromIndex ≤ len, return fromIndex.
432        if search_value.is_empty() && from_index <= len {
433            return Some(from_index);
434        }
435
436        // 6. Let searchLen be the length of searchValue.
437        let search_len = search_value.encode_utf16().count();
438
439        // 7. For each integer i starting with fromIndex such that i ≤ len - searchLen, in ascending order, do
440        for i in from_index..=len {
441            if i as isize > (len as isize - search_len as isize) {
442                break;
443            }
444
445            // a. Let candidate be the substring of string from i to i + searchLen.
446            let candidate = String::from_utf16_lossy(
447                &self
448                    .encode_utf16()
449                    .skip(i)
450                    .take(search_len)
451                    .collect::<Vec<u16>>(),
452            );
453
454            // b. If candidate is the same sequence of code units as searchValue, return i.
455            if candidate == search_value.as_str() {
456                return Some(i);
457            }
458        }
459
460        // 8. Return -1.
461        None
462    }
463
464    pub(crate) fn string_to_number(&self) -> f64 {
465        let string = self.trim_matches(is_trimmable_whitespace);
466
467        // TODO: write our own lexer to match syntax StrDecimalLiteral
468        match string {
469            "" => 0.0,
470            "Infinity" | "+Infinity" => f64::INFINITY,
471            "-Infinity" => f64::NEG_INFINITY,
472            _ if matches!(
473                string
474                    .chars()
475                    .take(4)
476                    .collect::<String>()
477                    .to_ascii_lowercase()
478                    .as_str(),
479                "inf" | "+inf" | "-inf" | "nan" | "+nan" | "-nan"
480            ) =>
481            {
482                // Prevent fast_float from parsing "inf", "+inf" as Infinity and "-inf" as -Infinity
483                f64::NAN
484            }
485            _ => fast_float::parse(string).unwrap_or(f64::NAN),
486        }
487    }
488}
489
490impl Finalize for JsString {}
491
492// Safety: [`JsString`] does not contain any objects which recquire trace,
493// so this is safe.
494unsafe impl Trace for JsString {
495    empty_trace!();
496}
497
498impl Clone for JsString {
499    #[inline]
500    fn clone(&self) -> Self {
501        let inner = self.inner();
502        inner.refcount.set(inner.refcount.get() + 1);
503
504        JsString {
505            inner: self.inner,
506            _marker: PhantomData,
507        }
508    }
509}
510
511impl Drop for JsString {
512    #[inline]
513    fn drop(&mut self) {
514        let inner = self.inner();
515        if inner.refcount.get() == 1 {
516            // Safety: If refcount is 1 and we call drop, that means this is the last
517            // JsString which points to this memory allocation, so deallocating it is safe.
518            unsafe {
519                Inner::dealloc(self.inner);
520            }
521        } else {
522            inner.refcount.set(inner.refcount.get() - 1);
523        }
524    }
525}
526
527impl std::fmt::Debug for JsString {
528    #[inline]
529    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
530        self.as_str().fmt(f)
531    }
532}
533
534impl std::fmt::Display for JsString {
535    #[inline]
536    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
537        self.as_str().fmt(f)
538    }
539}
540
541impl From<&str> for JsString {
542    #[inline]
543    fn from(s: &str) -> Self {
544        Self::new(s)
545    }
546}
547
548impl From<Box<str>> for JsString {
549    #[inline]
550    fn from(s: Box<str>) -> Self {
551        Self::new(s)
552    }
553}
554
555impl From<String> for JsString {
556    #[inline]
557    fn from(s: String) -> Self {
558        Self::new(s)
559    }
560}
561
562impl AsRef<str> for JsString {
563    #[inline]
564    fn as_ref(&self) -> &str {
565        self.as_str()
566    }
567}
568
569impl Borrow<str> for JsString {
570    #[inline]
571    fn borrow(&self) -> &str {
572        self.as_str()
573    }
574}
575
576impl Deref for JsString {
577    type Target = str;
578
579    #[inline]
580    fn deref(&self) -> &Self::Target {
581        self.as_str()
582    }
583}
584
585impl PartialEq<JsString> for JsString {
586    #[inline]
587    fn eq(&self, other: &Self) -> bool {
588        // If they point at the same memory allocation, then they are equal.
589        if Self::ptr_eq(self, other) {
590            return true;
591        }
592
593        self.as_str() == other.as_str()
594    }
595}
596
597impl Eq for JsString {}
598
599impl Hash for JsString {
600    #[inline]
601    fn hash<H: Hasher>(&self, state: &mut H) {
602        self.as_str().hash(state);
603    }
604}
605
606impl PartialOrd for JsString {
607    #[inline]
608    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
609        self.as_str().partial_cmp(other.as_str())
610    }
611}
612
613impl Ord for JsString {
614    #[inline]
615    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
616        self.as_str().cmp(other)
617    }
618}
619
620impl PartialEq<str> for JsString {
621    #[inline]
622    fn eq(&self, other: &str) -> bool {
623        self.as_str() == other
624    }
625}
626
627impl PartialEq<JsString> for str {
628    #[inline]
629    fn eq(&self, other: &JsString) -> bool {
630        self == other.as_str()
631    }
632}
633
634impl PartialEq<&str> for JsString {
635    #[inline]
636    fn eq(&self, other: &&str) -> bool {
637        self.as_str() == *other
638    }
639}
640
641impl PartialEq<JsString> for &str {
642    #[inline]
643    fn eq(&self, other: &JsString) -> bool {
644        *self == other.as_str()
645    }
646}
647
648#[cfg(test)]
649mod tests {
650    use super::JsString;
651    use std::mem::size_of;
652
653    #[test]
654    fn empty() {
655        let _ = JsString::new("");
656    }
657
658    #[test]
659    fn pointer_size() {
660        assert_eq!(size_of::<JsString>(), size_of::<*const u8>());
661        assert_eq!(size_of::<Option<JsString>>(), size_of::<*const u8>());
662    }
663
664    #[test]
665    fn refcount() {
666        let x = JsString::new("Hello wrold");
667        assert_eq!(JsString::refcount(&x), 1);
668
669        {
670            let y = x.clone();
671            assert_eq!(JsString::refcount(&x), 2);
672            assert_eq!(JsString::refcount(&y), 2);
673
674            {
675                let z = y.clone();
676                assert_eq!(JsString::refcount(&x), 3);
677                assert_eq!(JsString::refcount(&y), 3);
678                assert_eq!(JsString::refcount(&z), 3);
679            }
680
681            assert_eq!(JsString::refcount(&x), 2);
682            assert_eq!(JsString::refcount(&y), 2);
683        }
684
685        assert_eq!(JsString::refcount(&x), 1);
686    }
687
688    #[test]
689    fn ptr_eq() {
690        let x = JsString::new("Hello");
691        let y = x.clone();
692
693        assert!(JsString::ptr_eq(&x, &y));
694
695        let z = JsString::new("Hello");
696        assert!(!JsString::ptr_eq(&x, &z));
697        assert!(!JsString::ptr_eq(&y, &z));
698    }
699
700    #[test]
701    fn as_str() {
702        let s = "Hello";
703        let x = JsString::new(s);
704
705        assert_eq!(x.as_str(), s);
706    }
707
708    #[test]
709    fn hash() {
710        use std::collections::hash_map::DefaultHasher;
711        use std::hash::{Hash, Hasher};
712
713        let s = "Hello, world!";
714        let x = JsString::new(s);
715
716        assert_eq!(x.as_str(), s);
717
718        let mut hasher = DefaultHasher::new();
719        s.hash(&mut hasher);
720        let s_hash = hasher.finish();
721        let mut hasher = DefaultHasher::new();
722        x.hash(&mut hasher);
723        let x_hash = hasher.finish();
724
725        assert_eq!(s_hash, x_hash);
726    }
727
728    #[test]
729    fn concat() {
730        let x = JsString::new("hello");
731        let y = ", ";
732        let z = JsString::new("world");
733        let w = String::from("!");
734
735        let xy = JsString::concat(x, y);
736        assert_eq!(xy, "hello, ");
737        assert_eq!(JsString::refcount(&xy), 1);
738
739        let xyz = JsString::concat(xy, z);
740        assert_eq!(xyz, "hello, world");
741        assert_eq!(JsString::refcount(&xyz), 1);
742
743        let xyzw = JsString::concat(xyz, w);
744        assert_eq!(xyzw, "hello, world!");
745        assert_eq!(JsString::refcount(&xyzw), 1);
746    }
747}