#![deny(
unsafe_op_in_unsafe_fn,
clippy::undocumented_unsafe_blocks,
clippy::missing_safety_doc
)]
#![allow(unstable_name_collisions)]
pub(crate) mod common;
use crate::{
builtins::string::is_trimmable_whitespace,
tagged::{Tagged, UnwrappedTagged},
JsBigInt,
};
use boa_gc::{empty_trace, Finalize, Trace};
pub use boa_macros::utf16;
use std::{
alloc::{alloc, dealloc, Layout},
borrow::Borrow,
cell::Cell,
convert::Infallible,
hash::{Hash, Hasher},
iter::Peekable,
ops::{Deref, Index},
process::abort,
ptr::{self, addr_of, addr_of_mut, NonNull},
slice::SliceIndex,
str::FromStr,
};
use self::common::StaticJsStrings;
fn alloc_overflow() -> ! {
panic!("detected overflow during string allocation")
}
#[macro_export]
macro_rules! js_string {
() => {
$crate::JsString::default()
};
($s:literal) => {
$crate::JsString::from($crate::string::utf16!($s))
};
($s:expr) => {
$crate::JsString::from($s)
};
( $x:expr, $y:expr ) => {
$crate::JsString::concat($x, $y)
};
( $( $s:expr ),+ ) => {
$crate::JsString::concat_array(&[ $( $s ),+ ])
};
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum CodePoint {
Unicode(char),
UnpairedSurrogate(u16),
}
impl CodePoint {
#[must_use]
pub const fn code_unit_count(self) -> usize {
match self {
Self::Unicode(c) => c.len_utf16(),
Self::UnpairedSurrogate(_) => 1,
}
}
#[must_use]
pub fn as_u32(self) -> u32 {
match self {
Self::Unicode(c) => u32::from(c),
Self::UnpairedSurrogate(surr) => u32::from(surr),
}
}
#[must_use]
pub const fn as_char(self) -> Option<char> {
match self {
Self::Unicode(c) => Some(c),
Self::UnpairedSurrogate(_) => None,
}
}
pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
match self {
Self::Unicode(c) => c.encode_utf16(dst),
Self::UnpairedSurrogate(surr) => {
dst[0] = surr;
&mut dst[0..=0]
}
}
}
}
#[repr(C)]
struct RawJsString {
len: usize,
refcount: Cell<usize>,
data: [u16; 0],
}
const DATA_OFFSET: usize = std::mem::size_of::<RawJsString>();
#[derive(Finalize)]
pub struct JsString {
ptr: Tagged<RawJsString>,
}
sa::assert_eq_size!(JsString, *const ());
unsafe impl Trace for JsString {
empty_trace!();
}
impl JsString {
#[must_use]
pub fn as_slice(&self) -> &[u16] {
self
}
#[must_use]
pub fn concat(x: &[u16], y: &[u16]) -> Self {
Self::concat_array(&[x, y])
}
#[must_use]
pub fn concat_array(strings: &[&[u16]]) -> Self {
let mut full_count = 0usize;
for &string in strings {
let Some(sum) = full_count.checked_add(string.len()) else {
alloc_overflow()
};
full_count = sum;
}
let ptr = Self::allocate_inner(full_count);
let string = {
let mut data = unsafe { addr_of_mut!((*ptr.as_ptr()).data).cast() };
for string in strings {
let count = string.len();
unsafe {
ptr::copy_nonoverlapping(string.as_ptr(), data, count);
data = data.add(count);
}
}
Self {
ptr: unsafe { Tagged::from_ptr(ptr.as_ptr()) },
}
};
StaticJsStrings::get_string(&string[..]).unwrap_or(string)
}
#[must_use]
pub fn to_std_string_escaped(&self) -> String {
self.to_string_escaped()
}
pub fn to_std_string(&self) -> Result<String, std::string::FromUtf16Error> {
String::from_utf16(self)
}
pub fn to_std_string_with_surrogates(&self) -> impl Iterator<Item = Result<String, u16>> + '_ {
struct WideStringDecoderIterator<I: Iterator> {
codepoints: Peekable<I>,
}
impl<I: Iterator> WideStringDecoderIterator<I> {
fn new(iterator: I) -> Self {
Self {
codepoints: iterator.peekable(),
}
}
}
impl<I> Iterator for WideStringDecoderIterator<I>
where
I: Iterator<Item = CodePoint>,
{
type Item = Result<String, u16>;
fn next(&mut self) -> Option<Self::Item> {
let cp = self.codepoints.next()?;
let char = match cp {
CodePoint::Unicode(c) => c,
CodePoint::UnpairedSurrogate(surr) => return Some(Err(surr)),
};
let mut string = String::from(char);
loop {
let Some(cp) = self.codepoints.peek().and_then(|cp| match cp {
CodePoint::Unicode(c) => Some(*c),
CodePoint::UnpairedSurrogate(_) => None,
}) else { break; };
string.push(cp);
self.codepoints
.next()
.expect("should exist by the check above");
}
Some(Ok(string))
}
}
WideStringDecoderIterator::new(self.code_points())
}
#[must_use]
pub fn map_valid_segments<F>(&self, mut f: F) -> Self
where
F: FnMut(String) -> String,
{
let mut text = Vec::new();
for part in self.to_std_string_with_surrogates() {
match part {
Ok(string) => text.extend(f(string).encode_utf16()),
Err(surr) => text.push(surr),
}
}
js_string!(text)
}
pub fn code_points(&self) -> impl Iterator<Item = CodePoint> + Clone + '_ {
char::decode_utf16(self.iter().copied()).map(|res| match res {
Ok(c) => CodePoint::Unicode(c),
Err(e) => CodePoint::UnpairedSurrogate(e.unpaired_surrogate()),
})
}
pub(crate) fn index_of(&self, search_value: &[u16], from_index: usize) -> Option<usize> {
let len = self.len();
if search_value.is_empty() {
return if from_index <= len {
Some(from_index)
} else {
None
};
}
self.windows(search_value.len())
.skip(from_index)
.position(|s| s == search_value)
.map(|i| i + from_index)
}
pub(crate) fn code_point_at(&self, position: usize) -> CodePoint {
let size = self.len();
assert!(position < size);
let code_point = self
.get(position..=position + 1)
.unwrap_or(&self[position..=position]);
match char::decode_utf16(code_point.iter().copied())
.next()
.expect("code_point always has a value")
{
Ok(c) => CodePoint::Unicode(c),
Err(e) => CodePoint::UnpairedSurrogate(e.unpaired_surrogate()),
}
}
pub(crate) fn to_number(&self) -> f64 {
let Ok(string) = self.to_std_string() else {
return f64::NAN;
};
let string = string.trim_matches(is_trimmable_whitespace);
match string {
"" => return 0.0,
"-Infinity" => return f64::NEG_INFINITY,
"Infinity" | "+Infinity" => return f64::INFINITY,
_ => {}
}
let mut s = string.bytes();
let base = match (s.next(), s.next()) {
(Some(b'0'), Some(b'b' | b'B')) => Some(2),
(Some(b'0'), Some(b'o' | b'O')) => Some(8),
(Some(b'0'), Some(b'x' | b'X')) => Some(16),
(Some(b'i' | b'I'), _) => {
return f64::NAN;
}
_ => None,
};
if let Some(base) = base {
let string = &string[2..];
if string.is_empty() {
return f64::NAN;
}
if let Ok(value) = u32::from_str_radix(string, base) {
return f64::from(value);
}
let mut value: f64 = 0.0;
for c in s {
if let Some(digit) = char::from(c).to_digit(base) {
value = value.mul_add(f64::from(base), f64::from(digit));
} else {
return f64::NAN;
}
}
return value;
}
fast_float::parse(string).unwrap_or(f64::NAN)
}
pub(crate) fn to_big_int(&self) -> Option<JsBigInt> {
JsBigInt::from_string(self.to_std_string().ok().as_ref()?)
}
fn allocate_inner(str_len: usize) -> NonNull<RawJsString> {
match Self::try_allocate_inner(str_len) {
Ok(v) => v,
Err(None) => alloc_overflow(),
Err(Some(layout)) => std::alloc::handle_alloc_error(layout),
}
}
fn try_allocate_inner(str_len: usize) -> Result<NonNull<RawJsString>, Option<Layout>> {
let (layout, offset) = Layout::array::<u16>(str_len)
.and_then(|arr| Layout::new::<RawJsString>().extend(arr))
.map(|(layout, offset)| (layout.pad_to_align(), offset))
.map_err(|_| None)?;
debug_assert_eq!(offset, DATA_OFFSET);
#[allow(clippy::cast_ptr_alignment)]
let inner = unsafe { alloc(layout).cast::<RawJsString>() };
let inner = NonNull::new(inner).ok_or(Some(layout))?;
unsafe {
inner.as_ptr().write(RawJsString {
len: str_len,
refcount: Cell::new(1),
data: [0; 0],
});
}
debug_assert!({
let inner = inner.as_ptr();
unsafe {
ptr::eq(
inner.cast::<u8>().add(offset).cast(),
(*inner).data.as_mut_ptr(),
)
}
});
Ok(inner)
}
fn from_slice_skip_interning(string: &[u16]) -> Self {
let count = string.len();
let ptr = Self::allocate_inner(count);
let data = unsafe { addr_of_mut!((*ptr.as_ptr()).data) };
unsafe {
ptr::copy_nonoverlapping(string.as_ptr(), data.cast(), count);
}
Self {
ptr: Tagged::from_non_null(ptr),
}
}
}
impl AsRef<[u16]> for JsString {
fn as_ref(&self) -> &[u16] {
self
}
}
impl Borrow<[u16]> for JsString {
fn borrow(&self) -> &[u16] {
self
}
}
impl Clone for JsString {
#[inline]
fn clone(&self) -> Self {
if let UnwrappedTagged::Ptr(inner) = self.ptr.unwrap() {
let inner = unsafe { inner.as_ref() };
let strong = inner.refcount.get().wrapping_add(1);
if strong == 0 {
abort()
}
inner.refcount.set(strong);
}
Self { ptr: self.ptr }
}
}
impl Default for JsString {
#[inline]
fn default() -> Self {
StaticJsStrings::empty_string()
}
}
impl Drop for JsString {
fn drop(&mut self) {
if let UnwrappedTagged::Ptr(raw) = self.ptr.unwrap() {
let inner = unsafe { raw.as_ref() };
inner.refcount.set(inner.refcount.get() - 1);
if inner.refcount.get() != 0 {
return;
}
let layout = unsafe {
Layout::for_value(inner)
.extend(Layout::array::<u16>(inner.len).unwrap_unchecked())
.unwrap_unchecked()
.0
.pad_to_align()
};
unsafe {
dealloc(raw.as_ptr().cast(), layout);
}
}
}
}
impl std::fmt::Debug for JsString {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::char::decode_utf16(self.as_slice().to_owned())
.map(|r| {
r.map_or_else(
|err| format!("<0x{:04x}>", err.unpaired_surrogate()),
String::from,
)
})
.collect::<String>()
.fmt(f)
}
}
impl Deref for JsString {
type Target = [u16];
fn deref(&self) -> &Self::Target {
match self.ptr.unwrap() {
UnwrappedTagged::Ptr(h) => {
unsafe {
let h = h.as_ptr();
std::slice::from_raw_parts(addr_of!((*h).data).cast(), (*h).len)
}
}
UnwrappedTagged::Tag(index) => {
unsafe { StaticJsStrings::get(index).unwrap_unchecked() }
}
}
}
}
impl Eq for JsString {}
impl From<&[u16]> for JsString {
fn from(s: &[u16]) -> Self {
StaticJsStrings::get_string(s).unwrap_or_else(|| Self::from_slice_skip_interning(s))
}
}
impl From<Vec<u16>> for JsString {
fn from(vec: Vec<u16>) -> Self {
Self::from(&vec[..])
}
}
impl From<&str> for JsString {
#[inline]
fn from(s: &str) -> Self {
let s = s.encode_utf16().collect::<Vec<_>>();
Self::from(&s[..])
}
}
impl From<String> for JsString {
#[inline]
fn from(s: String) -> Self {
Self::from(s.as_str())
}
}
impl<const N: usize> From<&[u16; N]> for JsString {
fn from(s: &[u16; N]) -> Self {
Self::from(&s[..])
}
}
impl Hash for JsString {
fn hash<H: Hasher>(&self, state: &mut H) {
self[..].hash(state);
}
}
impl<I: SliceIndex<[u16]>> Index<I> for JsString {
type Output = I::Output;
#[inline]
fn index(&self, index: I) -> &Self::Output {
Index::index(&**self, index)
}
}
impl Ord for JsString {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self[..].cmp(other)
}
}
impl PartialEq for JsString {
fn eq(&self, other: &Self) -> bool {
self[..] == other[..]
}
}
impl PartialEq<JsString> for [u16] {
fn eq(&self, other: &JsString) -> bool {
self == &**other
}
}
impl<const N: usize> PartialEq<JsString> for [u16; N] {
fn eq(&self, other: &JsString) -> bool {
self[..] == *other
}
}
impl PartialEq<[u16]> for JsString {
fn eq(&self, other: &[u16]) -> bool {
&**self == other
}
}
impl<const N: usize> PartialEq<[u16; N]> for JsString {
fn eq(&self, other: &[u16; N]) -> bool {
*self == other[..]
}
}
impl PartialEq<str> for JsString {
fn eq(&self, other: &str) -> bool {
let utf16 = self.code_points();
let mut utf8 = other.chars();
for lhs in utf16 {
if let Some(rhs) = utf8.next() {
match lhs {
CodePoint::Unicode(lhs) if lhs == rhs => continue,
_ => return false,
}
}
return false;
}
utf8.next().is_none()
}
}
impl PartialEq<JsString> for str {
fn eq(&self, other: &JsString) -> bool {
other == self
}
}
impl PartialOrd for JsString {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self[..].partial_cmp(other)
}
}
impl FromStr for JsString {
type Err = Infallible;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self::from(s))
}
}
pub(crate) trait Utf16Trim {
fn trim(&self) -> &Self {
self.trim_start().trim_end()
}
fn trim_start(&self) -> &Self;
fn trim_end(&self) -> &Self;
}
impl Utf16Trim for [u16] {
fn trim_start(&self) -> &Self {
if let Some(left) = self.iter().copied().position(|r| {
!char::from_u32(u32::from(r))
.map(is_trimmable_whitespace)
.unwrap_or_default()
}) {
&self[left..]
} else {
&[]
}
}
fn trim_end(&self) -> &Self {
if let Some(right) = self.iter().copied().rposition(|r| {
!char::from_u32(u32::from(r))
.map(is_trimmable_whitespace)
.unwrap_or_default()
}) {
&self[..=right]
} else {
&[]
}
}
}
pub(crate) trait ToStringEscaped {
fn to_string_escaped(&self) -> String;
}
impl ToStringEscaped for [u16] {
fn to_string_escaped(&self) -> String {
char::decode_utf16(self.iter().copied())
.map(|r| match r {
Ok(c) => String::from(c),
Err(e) => format!("\\u{:04X}", e.unpaired_surrogate()),
})
.collect()
}
}
#[allow(clippy::redundant_clone)]
#[cfg(test)]
mod tests {
use crate::tagged::UnwrappedTagged;
use super::utf16;
use super::JsString;
impl JsString {
fn refcount(&self) -> Option<usize> {
match self.ptr.unwrap() {
UnwrappedTagged::Ptr(inner) => {
let inner = unsafe { inner.as_ref() };
Some(inner.refcount.get())
}
UnwrappedTagged::Tag(_inner) => None,
}
}
}
#[test]
fn empty() {
let s = js_string!();
assert_eq!(*s, "".encode_utf16().collect::<Vec<u16>>());
}
#[test]
fn refcount() {
let x = js_string!("Hello world");
assert_eq!(x.refcount(), Some(1));
{
let y = x.clone();
assert_eq!(x.refcount(), Some(2));
assert_eq!(y.refcount(), Some(2));
{
let z = y.clone();
assert_eq!(x.refcount(), Some(3));
assert_eq!(y.refcount(), Some(3));
assert_eq!(z.refcount(), Some(3));
}
assert_eq!(x.refcount(), Some(2));
assert_eq!(y.refcount(), Some(2));
}
assert_eq!(x.refcount(), Some(1));
}
#[test]
fn static_refcount() {
let x = js_string!();
assert_eq!(x.refcount(), None);
{
let y = x.clone();
assert_eq!(x.refcount(), None);
assert_eq!(y.refcount(), None);
};
assert_eq!(x.refcount(), None);
}
#[test]
fn ptr_eq() {
let x = js_string!("Hello");
let y = x.clone();
assert!(!x.ptr.is_tagged());
assert_eq!(x.ptr.addr(), y.ptr.addr());
let z = js_string!("Hello");
assert_ne!(x.ptr.addr(), z.ptr.addr());
assert_ne!(y.ptr.addr(), z.ptr.addr());
}
#[test]
fn static_ptr_eq() {
let x = js_string!();
let y = x.clone();
assert!(x.ptr.is_tagged());
assert_eq!(x.ptr.addr(), y.ptr.addr());
let z = js_string!();
assert_eq!(x.ptr.addr(), z.ptr.addr());
assert_eq!(y.ptr.addr(), z.ptr.addr());
}
#[test]
fn as_str() {
const HELLO: &str = "Hello";
let x = js_string!(HELLO);
assert_eq!(*x, HELLO.encode_utf16().collect::<Vec<u16>>());
}
#[test]
fn hash() {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
const HELLOWORLD: &[u16] = utf16!("Hello World!");
let x = js_string!(HELLOWORLD);
assert_eq!(&*x, HELLOWORLD);
let mut hasher = DefaultHasher::new();
HELLOWORLD.hash(&mut hasher);
let s_hash = hasher.finish();
let mut hasher = DefaultHasher::new();
x.hash(&mut hasher);
let x_hash = hasher.finish();
assert_eq!(s_hash, x_hash);
}
#[test]
fn concat() {
const Y: &[u16] = utf16!(", ");
const W: &[u16] = utf16!("!");
let x = js_string!("hello");
let z = js_string!("world");
let xy = js_string!(&x, Y);
assert_eq!(&xy, utf16!("hello, "));
assert_eq!(xy.refcount(), Some(1));
let xyz = js_string!(&xy, &z);
assert_eq!(&xyz, utf16!("hello, world"));
assert_eq!(xyz.refcount(), Some(1));
let xyzw = js_string!(&xyz, W);
assert_eq!(&xyzw, utf16!("hello, world!"));
assert_eq!(xyzw.refcount(), Some(1));
}
}