use crate::{DATA_OFFSET, JsStr, JsStrVariant, JsString, RawJsString, TaggedLen, alloc_overflow};
use std::{
alloc::{Layout, alloc, dealloc, realloc},
cell::Cell,
marker::PhantomData,
ops::{Add, AddAssign},
ptr::{self, NonNull},
str::{self},
};
#[derive(Debug)]
pub struct JsStringBuilder<D: Copy> {
cap: usize,
len: usize,
inner: NonNull<RawJsString>,
phantom_data: PhantomData<D>,
}
impl<D: Copy> Default for JsStringBuilder<D> {
fn default() -> Self {
Self::new()
}
}
impl<D: Copy> JsStringBuilder<D> {
const DATA_SIZE: usize = size_of::<D>();
const MIN_NON_ZERO_CAP: usize = 8 / Self::DATA_SIZE;
#[inline]
#[must_use]
pub const fn new() -> Self {
Self {
cap: 0,
len: 0,
inner: NonNull::dangling(),
phantom_data: PhantomData,
}
}
#[inline]
#[must_use]
pub const fn len(&self) -> usize {
self.len
}
#[inline]
pub const unsafe fn set_len(&mut self, new_len: usize) {
debug_assert!(new_len <= self.capacity());
self.len = new_len;
}
#[inline]
#[must_use]
pub const fn capacity(&self) -> usize {
self.cap
}
#[must_use]
const fn allocated_data_byte_len(&self) -> usize {
self.len() * Self::DATA_SIZE
}
#[must_use]
const fn capacity_from_layout(layout: Layout) -> usize {
(layout.size() - DATA_OFFSET) / Self::DATA_SIZE
}
#[inline]
#[must_use]
pub fn with_capacity(cap: usize) -> Self {
if cap == 0 {
return Self::new();
}
let layout = Self::new_layout(cap);
#[allow(clippy::cast_ptr_alignment)]
let ptr = unsafe { alloc(layout) };
let Some(ptr) = NonNull::new(ptr.cast()) else {
std::alloc::handle_alloc_error(layout)
};
Self {
cap: Self::capacity_from_layout(layout),
len: 0,
inner: ptr,
phantom_data: PhantomData,
}
}
#[must_use]
fn is_allocated(&self) -> bool {
self.inner != NonNull::dangling()
}
#[must_use]
unsafe fn current_layout(&self) -> Layout {
unsafe {
Layout::for_value(self.inner.as_ref())
.extend(Layout::array::<D>(self.capacity()).unwrap_unchecked())
.unwrap_unchecked()
.0
.pad_to_align()
}
}
#[must_use]
const unsafe fn data(&self) -> *mut D {
unsafe { (&raw mut (*self.inner.as_ptr()).data).cast() }
}
#[allow(clippy::inline_always)]
#[inline(always)]
fn allocate_if_needed(&mut self, reuired_cap: usize) {
if reuired_cap > self.capacity() {
self.allocate(reuired_cap);
}
}
#[allow(clippy::cast_ptr_alignment)]
fn allocate_inner(&mut self, new_layout: Layout) {
let new_ptr = if self.is_allocated() {
let old_ptr = self.inner.as_ptr();
let old_layout = unsafe { self.current_layout() };
unsafe { realloc(old_ptr.cast(), old_layout, new_layout.size()) }
} else {
unsafe { alloc(new_layout) }
};
let Some(new_ptr) = NonNull::new(new_ptr.cast::<RawJsString>()) else {
std::alloc::handle_alloc_error(new_layout)
};
self.inner = new_ptr;
self.cap = Self::capacity_from_layout(new_layout);
}
#[inline]
pub fn push(&mut self, v: D) {
let required_cap = self.len() + 1;
self.allocate_if_needed(required_cap);
unsafe {
self.push_unchecked(v);
}
}
#[inline]
pub const unsafe fn extend_from_slice_unchecked(&mut self, v: &[D]) {
unsafe {
ptr::copy_nonoverlapping(v.as_ptr(), self.data().add(self.len()), v.len());
}
self.len += v.len();
}
#[inline]
pub fn extend_from_slice(&mut self, v: &[D]) {
let required_cap = self.len() + v.len();
self.allocate_if_needed(required_cap);
unsafe {
self.extend_from_slice_unchecked(v);
}
}
fn new_layout(cap: usize) -> Layout {
let new_layout = Layout::array::<D>(cap)
.and_then(|arr| Layout::new::<RawJsString>().extend(arr))
.map(|(layout, offset)| (layout.pad_to_align(), offset))
.map_err(|_| None);
match new_layout {
Ok((new_layout, offset)) => {
debug_assert_eq!(offset, DATA_OFFSET);
new_layout
}
Err(None) => alloc_overflow(),
Err(Some(layout)) => std::alloc::handle_alloc_error(layout),
}
}
#[inline]
pub fn reserve(&mut self, additional: usize) {
if additional > self.capacity().wrapping_sub(self.len) {
let Some(cap) = self.len().checked_add(additional) else {
alloc_overflow()
};
self.allocate(cap);
}
}
#[inline]
pub fn reserve_exact(&mut self, additional: usize) {
if additional > self.capacity().wrapping_sub(self.len) {
let Some(cap) = self.len().checked_add(additional) else {
alloc_overflow()
};
self.allocate_inner(Self::new_layout(cap));
}
}
fn allocate(&mut self, cap: usize) {
let cap = std::cmp::max(self.capacity() * 2, cap);
let cap = std::cmp::max(Self::MIN_NON_ZERO_CAP, cap);
self.allocate_inner(Self::new_layout(cap));
}
#[inline]
pub const unsafe fn push_unchecked(&mut self, v: D) {
unsafe {
self.data().add(self.len()).write(v);
self.len += 1;
}
}
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
#[must_use]
pub fn is_ascii(&self) -> bool {
let data = unsafe {
std::slice::from_raw_parts(self.data().cast::<u8>(), self.allocated_data_byte_len())
};
data.is_ascii()
}
#[inline]
#[must_use]
pub fn as_slice(&self) -> &[D] {
if self.is_allocated() {
unsafe { std::slice::from_raw_parts(self.data(), self.len()) }
} else {
&[]
}
}
#[inline]
#[must_use]
pub unsafe fn as_mut_slice(&mut self) -> &mut [D] {
if self.is_allocated() {
unsafe { std::slice::from_raw_parts_mut(self.data(), self.len()) }
} else {
&mut []
}
}
#[inline]
#[must_use]
fn build_inner(mut self, latin1: bool) -> JsString {
if self.is_empty() {
return JsString::default();
}
let len = self.len();
if len != self.capacity() {
let layout = Self::new_layout(self.len());
self.allocate_inner(layout);
}
let inner = self.inner;
unsafe {
inner.as_ptr().write(RawJsString {
tagged_len: TaggedLen::new(len, latin1),
refcount: Cell::new(1),
data: [0; 0],
});
}
std::mem::forget(self);
JsString { ptr: inner }
}
}
impl<D: Copy> Drop for JsStringBuilder<D> {
#[cold]
#[inline]
fn drop(&mut self) {
if self.is_allocated() {
let layout = unsafe { self.current_layout() };
unsafe {
dealloc(self.inner.as_ptr().cast(), layout);
}
}
}
}
impl<D: Copy> AddAssign<&JsStringBuilder<D>> for JsStringBuilder<D> {
#[inline]
fn add_assign(&mut self, rhs: &JsStringBuilder<D>) {
self.extend_from_slice(rhs.as_slice());
}
}
impl<D: Copy> AddAssign<&[D]> for JsStringBuilder<D> {
#[inline]
fn add_assign(&mut self, rhs: &[D]) {
self.extend_from_slice(rhs);
}
}
impl<D: Copy> Add<&JsStringBuilder<D>> for JsStringBuilder<D> {
type Output = Self;
#[inline]
fn add(mut self, rhs: &JsStringBuilder<D>) -> Self::Output {
self.extend_from_slice(rhs.as_slice());
self
}
}
impl<D: Copy> Add<&[D]> for JsStringBuilder<D> {
type Output = Self;
#[inline]
fn add(mut self, rhs: &[D]) -> Self::Output {
self.extend_from_slice(rhs);
self
}
}
impl<D: Copy> Extend<D> for JsStringBuilder<D> {
#[inline]
fn extend<I: IntoIterator<Item = D>>(&mut self, iter: I) {
let iterator = iter.into_iter();
let (lower_bound, _) = iterator.size_hint();
let require_cap = self.len() + lower_bound;
self.allocate_if_needed(require_cap);
iterator.for_each(|c| self.push(c));
}
}
impl<D: Copy> FromIterator<D> for JsStringBuilder<D> {
#[inline]
fn from_iter<T: IntoIterator<Item = D>>(iter: T) -> Self {
let mut builder = Self::new();
builder.extend(iter);
builder
}
}
impl<D: Copy> From<&[D]> for JsStringBuilder<D> {
#[inline]
fn from(value: &[D]) -> Self {
let mut builder = Self::with_capacity(value.len());
unsafe { builder.extend_from_slice_unchecked(value) };
builder
}
}
impl<D: Copy + Eq + PartialEq> PartialEq for JsStringBuilder<D> {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.as_slice().eq(other.as_slice())
}
}
impl<D: Copy> Clone for JsStringBuilder<D> {
#[inline]
fn clone(&self) -> Self {
if self.is_allocated() {
let mut builder = Self::with_capacity(self.capacity());
unsafe { builder.extend_from_slice_unchecked(self.as_slice()) };
builder
} else {
Self::new()
}
}
#[inline]
fn clone_from(&mut self, source: &Self) {
let source_len = source.len();
if source_len > self.capacity() {
self.allocate(source_len);
} else {
if source_len == 0 {
unsafe { self.set_len(0) };
return;
}
}
let self_data = unsafe { self.data() };
let source_data = unsafe { source.data() };
unsafe { ptr::copy_nonoverlapping(source_data, self_data, source_len) };
unsafe { self.set_len(source_len) };
}
}
pub type Latin1JsStringBuilder = JsStringBuilder<u8>;
impl Latin1JsStringBuilder {
#[inline]
#[must_use]
pub fn build(self) -> Option<JsString> {
if self.is_ascii() {
Some(self.build_inner(true))
} else {
None
}
}
#[inline]
#[must_use]
pub unsafe fn build_as_latin1(self) -> JsString {
self.build_inner(true)
}
}
pub type Utf16JsStringBuilder = JsStringBuilder<u16>;
impl Utf16JsStringBuilder {
#[inline]
#[must_use]
pub fn build(self) -> JsString {
self.build_inner(false)
}
}
#[derive(Clone, Debug)]
pub enum Segment<'a> {
String(JsString),
Str(JsStr<'a>),
Latin1(u8),
CodePoint(char),
}
impl Segment<'_> {
#[inline]
#[must_use]
fn is_ascii(&self) -> bool {
match self {
Segment::String(s) => s.as_str().is_latin1(),
Segment::Str(s) => s.is_latin1(),
Segment::Latin1(b) => *b <= 0x7f,
Segment::CodePoint(ch) => *ch as u32 <= 0x7F,
}
}
}
impl From<JsString> for Segment<'_> {
#[inline]
fn from(value: JsString) -> Self {
Self::String(value)
}
}
impl From<String> for Segment<'_> {
#[inline]
fn from(value: String) -> Self {
Self::String(value.into())
}
}
impl From<&[u16]> for Segment<'_> {
#[inline]
fn from(value: &[u16]) -> Self {
Self::String(value.into())
}
}
impl From<&str> for Segment<'_> {
#[inline]
fn from(value: &str) -> Self {
Self::String(value.into())
}
}
impl<'seg, 'ref_str: 'seg> From<JsStr<'ref_str>> for Segment<'seg> {
#[inline]
fn from(value: JsStr<'ref_str>) -> Self {
Self::Str(value)
}
}
impl From<u8> for Segment<'_> {
#[inline]
fn from(value: u8) -> Self {
Self::Latin1(value)
}
}
impl From<char> for Segment<'_> {
#[inline]
fn from(value: char) -> Self {
Self::CodePoint(value)
}
}
#[derive(Clone, Debug, Default)]
pub struct CommonJsStringBuilder<'a> {
segments: Vec<Segment<'a>>,
}
impl<'seg, 'ref_str: 'seg> CommonJsStringBuilder<'seg> {
#[inline]
#[must_use]
pub const fn new() -> Self {
Self {
segments: Vec::new(),
}
}
#[inline]
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
Self {
segments: Vec::with_capacity(capacity),
}
}
#[inline]
pub fn reserve(&mut self, additional: usize) {
self.segments.reserve(additional);
}
#[inline]
pub fn reserve_exact(&mut self, additional: usize) {
self.segments.reserve_exact(additional);
}
#[inline]
pub fn push<T: Into<Segment<'ref_str>>>(&mut self, seg: T) {
self.segments.push(seg.into());
}
#[inline]
#[must_use]
pub fn is_ascii(&self) -> bool {
self.segments.iter().all(Segment::is_ascii)
}
#[inline]
#[must_use]
pub fn len(&self) -> usize {
self.segments.len()
}
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
#[must_use]
#[allow(clippy::cast_lossless)]
pub fn build_from_latin1(&self) -> Option<JsString> {
let mut builder = Latin1JsStringBuilder::new();
for seg in &self.segments {
match seg {
Segment::String(s) => {
if let Some(data) = s.as_str().as_latin1() {
builder.extend_from_slice(data);
} else {
return None;
}
}
Segment::Str(s) => {
if let Some(data) = s.as_latin1() {
builder.extend_from_slice(data);
} else {
return None;
}
}
Segment::Latin1(b) => {
if *b <= 0x7f {
builder.push(*b);
} else {
return None;
}
}
Segment::CodePoint(ch) => {
if let Ok(b) = u8::try_from(*ch as u32) {
builder.push(b);
} else {
return None;
}
}
}
}
builder.build()
}
#[inline]
#[must_use]
#[allow(clippy::cast_possible_truncation)]
pub fn build_from_utf16(self) -> JsString {
let mut builder = Utf16JsStringBuilder::new();
for seg in self.segments {
match seg {
Segment::String(s) => {
let js_str = s.as_str();
match js_str.variant() {
JsStrVariant::Latin1(s) => builder.extend(s.iter().copied().map(u16::from)),
JsStrVariant::Utf16(s) => builder.extend_from_slice(s),
}
}
Segment::Str(s) => match s.variant() {
JsStrVariant::Latin1(s) => builder.extend(s.iter().copied().map(u16::from)),
JsStrVariant::Utf16(s) => builder.extend_from_slice(s),
},
Segment::Latin1(latin1) => builder.push(u16::from(latin1)),
Segment::CodePoint(code_point) => {
builder.extend_from_slice(code_point.encode_utf16(&mut [0_u16; 2]));
}
}
}
builder.build()
}
#[inline]
#[must_use]
pub fn build(self) -> JsString {
if self.is_empty() {
JsString::default()
} else if self.is_ascii() {
unsafe { self.build_as_latin1() }
} else {
self.build_from_utf16()
}
}
#[inline]
#[must_use]
pub unsafe fn build_as_latin1(self) -> JsString {
let mut builder = Latin1JsStringBuilder::new();
for seg in self.segments {
match seg {
Segment::String(s) => {
let js_str = s.as_str();
let Some(s) = js_str.as_latin1() else {
unreachable!("string segment shoud be latin1")
};
builder.extend_from_slice(s);
}
Segment::Str(s) => {
let Some(s) = s.as_latin1() else {
unreachable!("string segment shoud be latin1")
};
builder.extend_from_slice(s);
}
Segment::Latin1(latin1) => builder.push(latin1),
Segment::CodePoint(code_point) => builder.push(code_point as u8),
}
}
unsafe { builder.build_as_latin1() }
}
}
impl<'ref_str, T: Into<Segment<'ref_str>>> AddAssign<T> for CommonJsStringBuilder<'ref_str> {
#[inline]
fn add_assign(&mut self, rhs: T) {
self.push(rhs);
}
}
impl<'ref_str, T: Into<Segment<'ref_str>>> Add<T> for CommonJsStringBuilder<'ref_str> {
type Output = Self;
#[inline]
fn add(mut self, rhs: T) -> Self::Output {
self.push(rhs);
self
}
}