#![deny(
unsafe_op_in_unsafe_fn,
clippy::undocumented_unsafe_blocks,
clippy::missing_safety_doc
)]
#![allow(clippy::module_name_repetitions)]
mod builder;
mod common;
mod display;
mod iter;
mod str;
#[cfg(test)]
mod tests;
use self::{iter::Windows, str::JsSliceIndex};
use crate::display::{JsStrDisplayEscaped, JsStrDisplayLossy};
#[doc(inline)]
pub use crate::{
builder::{CommonJsStringBuilder, Latin1JsStringBuilder, Utf16JsStringBuilder},
common::StaticJsStrings,
iter::Iter,
str::{JsStr, JsStrVariant},
};
use std::fmt::Write;
use std::{
alloc::{Layout, alloc, dealloc},
cell::Cell,
convert::Infallible,
hash::{Hash, Hasher},
process::abort,
ptr::{self, NonNull},
str::FromStr,
};
use std::{borrow::Cow, mem::ManuallyDrop};
fn alloc_overflow() -> ! {
panic!("detected overflow during string allocation")
}
pub(crate) const fn is_trimmable_whitespace(c: char) -> bool {
matches!(
c,
'\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{0020}' | '\u{00A0}' | '\u{FEFF}' |
'\u{1680}' | '\u{2000}'
..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' |
'\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}'
)
}
pub(crate) const fn is_trimmable_whitespace_latin1(c: u8) -> bool {
matches!(
c,
0x09 | 0x0B | 0x0C | 0x20 | 0xA0 |
0x0A | 0x0D
)
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum CodePoint {
Unicode(char),
UnpairedSurrogate(u16),
}
impl CodePoint {
#[inline]
#[must_use]
pub const fn code_unit_count(self) -> usize {
match self {
Self::Unicode(c) => c.len_utf16(),
Self::UnpairedSurrogate(_) => 1,
}
}
#[inline]
#[must_use]
pub fn as_u32(self) -> u32 {
match self {
Self::Unicode(c) => u32::from(c),
Self::UnpairedSurrogate(surr) => u32::from(surr),
}
}
#[inline]
#[must_use]
pub const fn as_char(self) -> Option<char> {
match self {
Self::Unicode(c) => Some(c),
Self::UnpairedSurrogate(_) => None,
}
}
#[inline]
#[must_use]
pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
match self {
Self::Unicode(c) => c.encode_utf16(dst),
Self::UnpairedSurrogate(surr) => {
dst[0] = surr;
&mut dst[0..=0]
}
}
}
}
impl std::fmt::Display for CodePoint {
#[inline]
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
CodePoint::Unicode(c) => f.write_char(*c),
CodePoint::UnpairedSurrogate(c) => {
write!(f, "\\u{c:04X}")
}
}
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
#[repr(transparent)]
struct TaggedLen(usize);
impl TaggedLen {
const LATIN1_BITFLAG: usize = 1 << 0;
const BITFLAG_COUNT: usize = 1;
const fn new(len: usize, latin1: bool) -> Self {
Self((len << Self::BITFLAG_COUNT) | (latin1 as usize))
}
const fn is_latin1(self) -> bool {
(self.0 & Self::LATIN1_BITFLAG) != 0
}
const fn len(self) -> usize {
self.0 >> Self::BITFLAG_COUNT
}
}
#[repr(C)]
#[allow(missing_debug_implementations)]
pub struct RawJsString {
tagged_len: TaggedLen,
refcount: Cell<usize>,
data: [u8; 0],
}
impl RawJsString {
const fn is_latin1(&self) -> bool {
self.tagged_len.is_latin1()
}
const fn len(&self) -> usize {
self.tagged_len.len()
}
}
const DATA_OFFSET: usize = size_of::<RawJsString>();
enum Unwrapped<'a> {
Heap(NonNull<RawJsString>),
Static(&'a JsStr<'static>),
}
#[allow(clippy::module_name_repetitions)]
pub struct JsString {
ptr: NonNull<RawJsString>,
}
static_assertions::assert_eq_size!(JsString, *const ());
impl<'a> From<&'a JsString> for JsStr<'a> {
#[inline]
fn from(value: &'a JsString) -> Self {
value.as_str()
}
}
impl<'a> IntoIterator for &'a JsString {
type IntoIter = Iter<'a>;
type Item = u16;
#[inline]
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl JsString {
#[inline]
#[must_use]
pub fn iter(&self) -> Iter<'_> {
self.as_str().iter()
}
#[inline]
#[must_use]
pub fn windows(&self, size: usize) -> Windows<'_> {
self.as_str().windows(size)
}
#[inline]
#[must_use]
pub fn to_std_string_escaped(&self) -> String {
self.display_escaped().to_string()
}
#[inline]
#[must_use]
pub fn to_std_string_lossy(&self) -> String {
self.display_lossy().to_string()
}
#[inline]
pub fn to_std_string(&self) -> Result<String, std::string::FromUtf16Error> {
self.as_str().to_std_string()
}
#[inline]
pub fn to_std_string_with_surrogates(&self) -> impl Iterator<Item = Result<String, u16>> + '_ {
self.as_str().to_std_string_with_surrogates()
}
#[inline]
#[must_use]
pub fn map_valid_segments<F>(&self, mut f: F) -> Self
where
F: FnMut(String) -> String,
{
let mut text = Vec::new();
for part in self.to_std_string_with_surrogates() {
match part {
Ok(string) => text.extend(f(string).encode_utf16()),
Err(surr) => text.push(surr),
}
}
Self::from(&text[..])
}
#[inline]
pub fn code_points(&self) -> impl Iterator<Item = CodePoint> + Clone + '_ {
self.as_str().code_points()
}
#[inline]
#[must_use]
pub fn index_of(&self, search_value: JsStr<'_>, from_index: usize) -> Option<usize> {
self.as_str().index_of(search_value, from_index)
}
#[inline]
#[must_use]
pub fn code_point_at(&self, position: usize) -> CodePoint {
self.as_str().code_point_at(position)
}
#[inline]
#[must_use]
pub fn to_number(&self) -> f64 {
self.as_str().to_number()
}
#[inline]
#[must_use]
pub fn len(&self) -> usize {
self.as_str().len()
}
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
#[must_use]
pub fn to_vec(&self) -> Vec<u16> {
self.as_str().to_vec()
}
#[inline]
#[must_use]
pub fn contains(&self, element: u8) -> bool {
self.as_str().contains(element)
}
#[inline]
#[must_use]
pub fn trim(&self) -> JsStr<'_> {
self.as_str().trim()
}
#[inline]
#[must_use]
pub fn trim_start(&self) -> JsStr<'_> {
self.as_str().trim_start()
}
#[inline]
#[must_use]
pub fn trim_end(&self) -> JsStr<'_> {
self.as_str().trim_end()
}
#[inline]
#[must_use]
pub fn get<'a, I>(&'a self, index: I) -> Option<I::Value>
where
I: JsSliceIndex<'a>,
{
self.as_str().get(index)
}
#[inline]
#[must_use]
pub unsafe fn get_unchecked<'a, I>(&'a self, index: I) -> I::Value
where
I: JsSliceIndex<'a>,
{
unsafe { self.as_str().get_unchecked(index) }
}
#[inline]
#[must_use]
pub fn get_expect<'a, I>(&'a self, index: I) -> I::Value
where
I: JsSliceIndex<'a>,
{
self.as_str().get_expect(index)
}
#[inline]
#[must_use]
pub fn display_escaped(&self) -> JsStrDisplayEscaped<'_> {
self.as_str().display_escaped()
}
#[inline]
#[must_use]
pub fn display_lossy(&self) -> JsStrDisplayLossy<'_> {
self.as_str().display_lossy()
}
#[inline]
#[must_use]
pub fn into_raw(self) -> NonNull<RawJsString> {
ManuallyDrop::new(self).ptr
}
#[inline]
#[must_use]
pub unsafe fn from_raw(ptr: NonNull<RawJsString>) -> Self {
Self { ptr }
}
}
static_assertions::const_assert!(align_of::<*const JsStr<'static>>() >= 2);
impl JsString {
#[must_use]
pub const fn from_static_js_str(src: &'static JsStr<'static>) -> Self {
let src = ptr::from_ref(src);
let ptr = unsafe { NonNull::new_unchecked(src.cast_mut()) };
let tagged_ptr = unsafe { ptr.byte_add(1) };
JsString {
ptr: tagged_ptr.cast::<RawJsString>(),
}
}
#[inline]
#[must_use]
pub fn is_static(&self) -> bool {
self.ptr.addr().get() & 1 != 0
}
pub(crate) fn unwrap(&self) -> Unwrapped<'_> {
if self.is_static() {
let ptr = unsafe { self.ptr.byte_sub(1) };
Unwrapped::Static(unsafe { ptr.cast::<JsStr<'static>>().as_ref() })
} else {
Unwrapped::Heap(self.ptr)
}
}
#[inline]
#[must_use]
pub fn as_str(&self) -> JsStr<'_> {
let ptr = match self.unwrap() {
Unwrapped::Heap(ptr) => ptr.as_ptr(),
Unwrapped::Static(js_str) => return *js_str,
};
unsafe {
let tagged_len = (*ptr).tagged_len;
let len = tagged_len.len();
let is_latin1 = tagged_len.is_latin1();
let ptr = (&raw const (*ptr).data).cast::<u8>();
if is_latin1 {
JsStr::latin1(std::slice::from_raw_parts(ptr, len))
} else {
#[allow(clippy::cast_ptr_alignment)]
JsStr::utf16(std::slice::from_raw_parts(ptr.cast::<u16>(), len))
}
}
}
#[inline]
#[must_use]
pub fn concat(x: JsStr<'_>, y: JsStr<'_>) -> Self {
Self::concat_array(&[x, y])
}
#[inline]
#[must_use]
pub fn concat_array(strings: &[JsStr<'_>]) -> Self {
let mut latin1_encoding = true;
let mut full_count = 0usize;
for string in strings {
let Some(sum) = full_count.checked_add(string.len()) else {
alloc_overflow()
};
if !string.is_latin1() {
latin1_encoding = false;
}
full_count = sum;
}
let ptr = Self::allocate_inner(full_count, latin1_encoding);
let string = {
let mut data = unsafe { (&raw mut (*ptr.as_ptr()).data).cast::<u8>() };
for &string in strings {
unsafe {
#[allow(clippy::cast_ptr_alignment)]
match (latin1_encoding, string.variant()) {
(true, JsStrVariant::Latin1(s)) => {
let count = s.len();
ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u8>(), count);
data = data.cast::<u8>().add(count).cast::<u8>();
}
(false, JsStrVariant::Latin1(s)) => {
let count = s.len();
for (i, byte) in s.iter().enumerate() {
*data.cast::<u16>().add(i) = u16::from(*byte);
}
data = data.cast::<u16>().add(count).cast::<u8>();
}
(false, JsStrVariant::Utf16(s)) => {
let count = s.len();
ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u16>(), count);
data = data.cast::<u16>().add(count).cast::<u8>();
}
(true, JsStrVariant::Utf16(_)) => {
unreachable!("Already checked that it's latin1 encoding")
}
}
}
}
Self {
ptr: unsafe { NonNull::new_unchecked(ptr.as_ptr()) },
}
};
StaticJsStrings::get_string(&string.as_str()).unwrap_or(string)
}
fn allocate_inner(str_len: usize, latin1: bool) -> NonNull<RawJsString> {
match Self::try_allocate_inner(str_len, latin1) {
Ok(v) => v,
Err(None) => alloc_overflow(),
Err(Some(layout)) => std::alloc::handle_alloc_error(layout),
}
}
fn try_allocate_inner(
str_len: usize,
latin1: bool,
) -> Result<NonNull<RawJsString>, Option<Layout>> {
let (layout, offset) = if latin1 {
Layout::array::<u8>(str_len)
} else {
Layout::array::<u16>(str_len)
}
.and_then(|arr| Layout::new::<RawJsString>().extend(arr))
.map(|(layout, offset)| (layout.pad_to_align(), offset))
.map_err(|_| None)?;
debug_assert_eq!(offset, DATA_OFFSET);
#[allow(clippy::cast_ptr_alignment)]
let inner = unsafe { alloc(layout).cast::<RawJsString>() };
let inner = NonNull::new(inner).ok_or(Some(layout))?;
unsafe {
inner.as_ptr().write(RawJsString {
tagged_len: TaggedLen::new(str_len, latin1),
refcount: Cell::new(1),
data: [0; 0],
});
}
debug_assert!({
let inner = inner.as_ptr();
unsafe {
ptr::eq(
inner.cast::<u8>().add(offset).cast(),
(*inner).data.as_mut_ptr(),
)
}
});
Ok(inner)
}
fn from_slice_skip_interning(string: JsStr<'_>) -> Self {
let count = string.len();
let ptr = Self::allocate_inner(count, string.is_latin1());
let data = unsafe { (&raw mut (*ptr.as_ptr()).data).cast::<u8>() };
unsafe {
#[allow(clippy::cast_ptr_alignment)]
match string.variant() {
JsStrVariant::Latin1(s) => {
ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u8>(), count);
}
JsStrVariant::Utf16(s) => {
ptr::copy_nonoverlapping(s.as_ptr(), data.cast::<u16>(), count);
}
}
}
Self { ptr }
}
fn from_slice(string: JsStr<'_>) -> Self {
if let Some(s) = StaticJsStrings::get_string(&string) {
return s;
}
Self::from_slice_skip_interning(string)
}
#[inline]
#[must_use]
pub fn refcount(&self) -> Option<usize> {
if self.is_static() {
return None;
}
let rc = unsafe { self.ptr.as_ref().refcount.get() };
Some(rc)
}
}
impl Clone for JsString {
#[inline]
fn clone(&self) -> Self {
if self.is_static() {
return Self { ptr: self.ptr };
}
let inner = unsafe { self.ptr.as_ref() };
let strong = inner.refcount.get().wrapping_add(1);
if strong == 0 {
abort()
}
inner.refcount.set(strong);
Self { ptr: self.ptr }
}
}
impl Default for JsString {
#[inline]
fn default() -> Self {
StaticJsStrings::EMPTY_STRING
}
}
impl Drop for JsString {
#[inline]
fn drop(&mut self) {
if self.is_static() {
return;
}
let inner = unsafe { self.ptr.as_ref() };
inner.refcount.set(inner.refcount.get() - 1);
if inner.refcount.get() != 0 {
return;
}
let layout = unsafe {
if inner.is_latin1() {
Layout::for_value(inner)
.extend(Layout::array::<u8>(inner.len()).unwrap_unchecked())
.unwrap_unchecked()
.0
.pad_to_align()
} else {
Layout::for_value(inner)
.extend(Layout::array::<u16>(inner.len()).unwrap_unchecked())
.unwrap_unchecked()
.0
.pad_to_align()
}
};
unsafe {
dealloc(self.ptr.cast().as_ptr(), layout);
}
}
}
impl std::fmt::Debug for JsString {
#[inline]
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.as_str().fmt(f)
}
}
impl Eq for JsString {}
macro_rules! impl_from_number_for_js_string {
($($module: ident => $($ty:ty),+)+) => {
$(
$(
impl From<$ty> for JsString {
#[inline]
fn from(value: $ty) -> Self {
JsString::from_slice_skip_interning(JsStr::latin1(
$module::Buffer::new().format(value).as_bytes(),
))
}
}
)+
)+
};
}
impl_from_number_for_js_string!(
itoa => i8, i16, i32, i64, i128, u8, u16, u32, u64, u128, isize, usize
ryu_js => f32, f64
);
impl From<&[u16]> for JsString {
#[inline]
fn from(s: &[u16]) -> Self {
JsString::from_slice(JsStr::utf16(s))
}
}
impl From<&str> for JsString {
#[inline]
fn from(s: &str) -> Self {
if s.is_ascii() {
let js_str = JsStr::latin1(s.as_bytes());
return StaticJsStrings::get_string(&js_str)
.unwrap_or_else(|| JsString::from_slice_skip_interning(js_str));
}
let s = s.encode_utf16().collect::<Vec<_>>();
JsString::from_slice_skip_interning(JsStr::utf16(&s[..]))
}
}
impl From<JsStr<'_>> for JsString {
#[inline]
fn from(value: JsStr<'_>) -> Self {
StaticJsStrings::get_string(&value)
.unwrap_or_else(|| JsString::from_slice_skip_interning(value))
}
}
impl From<&[JsString]> for JsString {
#[inline]
fn from(value: &[JsString]) -> Self {
Self::concat_array(&value.iter().map(Self::as_str).collect::<Vec<_>>()[..])
}
}
impl<const N: usize> From<&[JsString; N]> for JsString {
#[inline]
fn from(value: &[JsString; N]) -> Self {
Self::concat_array(&value.iter().map(Self::as_str).collect::<Vec<_>>()[..])
}
}
impl From<String> for JsString {
#[inline]
fn from(s: String) -> Self {
Self::from(s.as_str())
}
}
impl<'a> From<Cow<'a, str>> for JsString {
#[inline]
fn from(s: Cow<'a, str>) -> Self {
match s {
Cow::Borrowed(s) => s.into(),
Cow::Owned(s) => s.into(),
}
}
}
impl<const N: usize> From<&[u16; N]> for JsString {
#[inline]
fn from(s: &[u16; N]) -> Self {
Self::from(&s[..])
}
}
impl Hash for JsString {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
self.as_str().hash(state);
}
}
impl PartialOrd for JsStr<'_> {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for JsString {
#[inline]
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.as_str().cmp(&other.as_str())
}
}
impl PartialEq for JsString {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.as_str() == other.as_str()
}
}
impl PartialEq<JsString> for [u16] {
#[inline]
fn eq(&self, other: &JsString) -> bool {
if self.len() != other.len() {
return false;
}
for (x, y) in self.iter().copied().zip(other.iter()) {
if x != y {
return false;
}
}
true
}
}
impl<const N: usize> PartialEq<JsString> for [u16; N] {
#[inline]
fn eq(&self, other: &JsString) -> bool {
self[..] == *other
}
}
impl PartialEq<[u16]> for JsString {
#[inline]
fn eq(&self, other: &[u16]) -> bool {
other == self
}
}
impl<const N: usize> PartialEq<[u16; N]> for JsString {
#[inline]
fn eq(&self, other: &[u16; N]) -> bool {
*self == other[..]
}
}
impl PartialEq<str> for JsString {
#[inline]
fn eq(&self, other: &str) -> bool {
self.as_str() == other
}
}
impl PartialEq<&str> for JsString {
#[inline]
fn eq(&self, other: &&str) -> bool {
self.as_str() == *other
}
}
impl PartialEq<JsString> for str {
#[inline]
fn eq(&self, other: &JsString) -> bool {
other == self
}
}
impl PartialEq<JsStr<'_>> for JsString {
#[inline]
fn eq(&self, other: &JsStr<'_>) -> bool {
self.as_str() == *other
}
}
impl PartialEq<JsString> for JsStr<'_> {
#[inline]
fn eq(&self, other: &JsString) -> bool {
other == self
}
}
impl PartialOrd for JsString {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl FromStr for JsString {
type Err = Infallible;
#[inline]
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self::from(s))
}
}