use crate::{
decode_utf16_surrogate_pair,
error::{Utf16Error, Utf32Error},
is_utf16_low_surrogate, is_utf16_surrogate, validate_utf16, validate_utf16_vec, validate_utf32,
validate_utf32_vec, Utf16Str, Utf32Str,
};
#[allow(unused_imports)]
use alloc::{
borrow::{Cow, ToOwned},
boxed::Box,
string::String,
vec::Vec,
};
#[allow(unused_imports)]
use core::{
borrow::{Borrow, BorrowMut},
convert::{AsMut, AsRef, From, Infallible, TryFrom},
fmt::Write,
iter::FromIterator,
mem,
ops::{Add, AddAssign, Deref, DerefMut, Index, IndexMut, RangeBounds},
ptr,
slice::SliceIndex,
str::FromStr,
};
mod iter;
pub use iter::*;
macro_rules! utfstring_common_impl {
{
$(#[$utfstring_meta:meta])*
struct $utfstring:ident([$uchar:ty]);
type UtfStr = $utfstr:ident;
type UStr = $ustr:ident;
type UCStr = $ucstr:ident;
type UString = $ustring:ident;
type UCString = $ucstring:ident;
type UtfError = $utferror:ident;
$(#[$from_vec_unchecked_meta:meta])*
fn from_vec_unchecked() -> {}
$(#[$from_str_meta:meta])*
fn from_str() -> {}
$(#[$push_utfstr_meta:meta])*
fn push_utfstr() -> {}
$(#[$as_mut_vec_meta:meta])*
fn as_mut_vec() -> {}
} => {
$(#[$utfstring_meta])*
#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
pub struct $utfstring {
inner: Vec<$uchar>,
}
impl $utfstring {
#[inline]
#[must_use]
pub const fn new() -> Self {
Self { inner: Vec::new() }
}
#[inline]
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
Self {
inner: Vec::with_capacity(capacity),
}
}
$(#[$from_vec_unchecked_meta])*
#[inline]
#[must_use]
pub unsafe fn from_vec_unchecked(v: impl Into<Vec<$uchar>>) -> Self {
Self { inner: v.into() }
}
$(#[$from_str_meta])*
#[inline]
#[allow(clippy::should_implement_trait)]
#[must_use]
pub fn from_str<S: AsRef<str> + ?Sized>(s: &S) -> Self {
let s = s.as_ref();
let mut string = Self::new();
string.extend(s.chars());
string
}
#[inline]
#[must_use]
pub fn as_utfstr(&self) -> &$utfstr {
unsafe { $utfstr::from_slice_unchecked(self.inner.as_slice()) }
}
#[inline]
#[must_use]
pub fn as_mut_utfstr(&mut self) -> &mut $utfstr {
unsafe { $utfstr::from_slice_unchecked_mut(&mut self.inner) }
}
#[inline]
#[must_use]
pub fn as_ustr(&self) -> &crate::$ustr {
crate::$ustr::from_slice(self.as_slice())
}
#[inline]
#[must_use]
pub fn into_vec(self) -> Vec<$uchar> {
self.inner
}
$(#[$push_utfstr_meta])*
#[inline]
pub fn push_utfstr<S: AsRef<$utfstr> + ?Sized>(&mut self, string: &S) {
self.inner.extend_from_slice(string.as_ref().as_slice())
}
#[inline]
#[must_use]
pub fn capacity(&self) -> usize {
self.inner.capacity()
}
#[inline]
pub fn reserve(&mut self, additional: usize) {
self.inner.reserve(additional)
}
#[inline]
pub fn reserve_exact(&mut self, additional: usize) {
self.inner.reserve_exact(additional)
}
#[inline]
pub fn shrink_to_fit(&mut self) {
self.inner.shrink_to_fit()
}
#[inline]
pub fn shrink_to(&mut self, min_capacity: usize) {
self.inner.shrink_to(min_capacity)
}
#[inline]
#[must_use]
pub fn as_slice(&self) -> &[$uchar] {
self.inner.as_slice()
}
unsafe fn insert_slice(&mut self, idx: usize, slice: &[$uchar]) {
let len = self.inner.len();
let amt = slice.len();
self.inner.reserve(amt);
ptr::copy(
self.inner.as_ptr().add(idx),
self.inner.as_mut_ptr().add(idx + amt),
len - idx,
);
ptr::copy_nonoverlapping(slice.as_ptr(), self.inner.as_mut_ptr().add(idx), amt);
self.inner.set_len(len + amt);
}
$(#[$as_mut_vec_meta])*
#[inline]
#[must_use]
pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<$uchar> {
&mut self.inner
}
#[inline]
#[must_use]
pub fn len(&self) -> usize {
self.inner.len()
}
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
self.inner.is_empty()
}
#[inline]
pub fn clear(&mut self) {
self.inner.clear()
}
#[inline]
#[must_use]
pub fn into_boxed_utfstr(self) -> Box<$utfstr> {
let slice = self.inner.into_boxed_slice();
unsafe { $utfstr::from_boxed_slice_unchecked(slice) }
}
#[inline]
pub fn push_str<S: AsRef<str> + ?Sized>(&mut self, string: &S) {
self.extend(string.as_ref().chars())
}
}
impl Add<&$utfstr> for $utfstring {
type Output = $utfstring;
#[inline]
fn add(mut self, rhs: &$utfstr) -> Self::Output {
self.push_utfstr(rhs);
self
}
}
impl Add<&str> for $utfstring {
type Output = $utfstring;
#[inline]
fn add(mut self, rhs: &str) -> Self::Output {
self.push_str(rhs);
self
}
}
impl AddAssign<&$utfstr> for $utfstring {
#[inline]
fn add_assign(&mut self, rhs: &$utfstr) {
self.push_utfstr(rhs)
}
}
impl AddAssign<&str> for $utfstring {
#[inline]
fn add_assign(&mut self, rhs: &str) {
self.push_str(rhs)
}
}
impl AsMut<$utfstr> for $utfstring {
#[inline]
fn as_mut(&mut self) -> &mut $utfstr {
self.as_mut_utfstr()
}
}
impl AsRef<$utfstr> for $utfstring {
#[inline]
fn as_ref(&self) -> &$utfstr {
self.as_utfstr()
}
}
impl AsRef<[$uchar]> for $utfstring {
#[inline]
fn as_ref(&self) -> &[$uchar] {
&self.inner
}
}
impl AsRef<crate::$ustr> for $utfstring {
#[inline]
fn as_ref(&self) -> &crate::$ustr {
self.as_ustr()
}
}
impl Borrow<$utfstr> for $utfstring {
#[inline]
fn borrow(&self) -> &$utfstr {
self.as_utfstr()
}
}
impl BorrowMut<$utfstr> for $utfstring {
#[inline]
fn borrow_mut(&mut self) -> &mut $utfstr {
self.as_mut_utfstr()
}
}
impl core::fmt::Debug for $utfstring {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
core::fmt::Debug::fmt(self.as_utfstr(), f)
}
}
impl Deref for $utfstring {
type Target = $utfstr;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_utfstr()
}
}
impl DerefMut for $utfstring {
#[inline]
fn deref_mut(&mut self) -> &mut Self::Target {
self.as_mut_utfstr()
}
}
impl core::fmt::Display for $utfstring {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
core::fmt::Display::fmt(self.as_utfstr(), f)
}
}
impl Extend<char> for $utfstring {
#[inline]
fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
let iter = iter.into_iter();
let (lower_bound, _) = iter.size_hint();
self.reserve(lower_bound);
iter.for_each(|c| self.push(c));
}
}
impl<'a> Extend<&'a char> for $utfstring {
#[inline]
fn extend<T: IntoIterator<Item = &'a char>>(&mut self, iter: T) {
self.extend(iter.into_iter().copied())
}
}
impl<'a> Extend<&'a $utfstr> for $utfstring {
#[inline]
fn extend<T: IntoIterator<Item = &'a $utfstr>>(&mut self, iter: T) {
iter.into_iter().for_each(|s| self.push_utfstr(s))
}
}
impl Extend<$utfstring> for $utfstring {
#[inline]
fn extend<T: IntoIterator<Item = $utfstring>>(&mut self, iter: T) {
iter.into_iter()
.for_each(|s| self.push_utfstr(&s))
}
}
impl<'a> Extend<Cow<'a, $utfstr>> for $utfstring {
#[inline]
fn extend<T: IntoIterator<Item = Cow<'a, $utfstr>>>(&mut self, iter: T) {
iter.into_iter().for_each(|s| self.push_utfstr(&s))
}
}
impl Extend<Box<$utfstr>> for $utfstring {
#[inline]
fn extend<T: IntoIterator<Item = Box<$utfstr>>>(&mut self, iter: T) {
iter.into_iter().for_each(|s| self.push_utfstr(&s))
}
}
impl<'a> Extend<&'a str> for $utfstring {
#[inline]
fn extend<T: IntoIterator<Item = &'a str>>(&mut self, iter: T) {
iter.into_iter().for_each(|s| self.push_str(s))
}
}
impl Extend<String> for $utfstring {
#[inline]
fn extend<T: IntoIterator<Item = String>>(&mut self, iter: T) {
iter.into_iter().for_each(|s| self.push_str(&s))
}
}
impl From<&mut $utfstr> for $utfstring {
#[inline]
fn from(value: &mut $utfstr) -> Self {
value.to_owned()
}
}
impl From<&$utfstr> for $utfstring {
#[inline]
fn from(value: &$utfstr) -> Self {
value.to_owned()
}
}
impl From<&$utfstring> for $utfstring {
#[inline]
fn from(value: &$utfstring) -> Self {
value.clone()
}
}
impl From<$utfstring> for Cow<'_, $utfstr> {
#[inline]
fn from(value: $utfstring) -> Self {
Cow::Owned(value)
}
}
impl<'a> From<&'a $utfstring> for Cow<'a, $utfstr> {
#[inline]
fn from(value: &'a $utfstring) -> Self {
Cow::Borrowed(value)
}
}
impl From<Cow<'_, $utfstr>> for $utfstring {
#[inline]
fn from(value: Cow<'_, $utfstr>) -> Self {
value.into_owned()
}
}
impl From<&str> for $utfstring {
#[inline]
fn from(value: &str) -> Self {
Self::from_str(value)
}
}
impl From<String> for $utfstring {
#[inline]
fn from(value: String) -> Self {
Self::from_str(&value)
}
}
impl From<$utfstring> for crate::$ustring {
#[inline]
fn from(value: $utfstring) -> Self {
crate::$ustring::from_vec(value.into_vec())
}
}
impl From<&$utfstr> for String {
#[inline]
fn from(value: &$utfstr) -> Self {
value.to_string()
}
}
impl From<$utfstring> for String {
#[inline]
fn from(value: $utfstring) -> Self {
value.to_string()
}
}
#[cfg(feature = "std")]
impl From<$utfstring> for std::ffi::OsString {
#[inline]
fn from(value: $utfstring) -> std::ffi::OsString {
value.as_ustr().to_os_string()
}
}
impl FromIterator<char> for $utfstring {
#[inline]
fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
let mut s = Self::new();
s.extend(iter);
s
}
}
impl<'a> FromIterator<&'a char> for $utfstring {
#[inline]
fn from_iter<T: IntoIterator<Item = &'a char>>(iter: T) -> Self {
let mut s = Self::new();
s.extend(iter);
s
}
}
impl<'a> FromIterator<&'a $utfstr> for $utfstring {
#[inline]
fn from_iter<T: IntoIterator<Item = &'a $utfstr>>(iter: T) -> Self {
let mut s = Self::new();
s.extend(iter);
s
}
}
impl FromIterator<$utfstring> for $utfstring {
fn from_iter<T: IntoIterator<Item = $utfstring>>(iter: T) -> Self {
let mut iterator = iter.into_iter();
match iterator.next() {
None => Self::new(),
Some(mut buf) => {
buf.extend(iterator);
buf
}
}
}
}
impl FromIterator<Box<$utfstr>> for $utfstring {
#[inline]
fn from_iter<T: IntoIterator<Item = Box<$utfstr>>>(iter: T) -> Self {
let mut s = Self::new();
s.extend(iter);
s
}
}
impl<'a> FromIterator<Cow<'a, $utfstr>> for $utfstring {
#[inline]
fn from_iter<T: IntoIterator<Item = Cow<'a, $utfstr>>>(iter: T) -> Self {
let mut s = Self::new();
s.extend(iter);
s
}
}
impl<'a> FromIterator<&'a str> for $utfstring {
#[inline]
fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
let mut s = Self::new();
s.extend(iter);
s
}
}
impl FromIterator<String> for $utfstring {
#[inline]
fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
let mut s = Self::new();
s.extend(iter);
s
}
}
impl FromStr for $utfstring {
type Err = Infallible;
#[inline]
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok($utfstring::from_str(s))
}
}
impl<I> Index<I> for $utfstring
where
I: RangeBounds<usize> + SliceIndex<[$uchar], Output = [$uchar]>,
{
type Output = $utfstr;
#[inline]
fn index(&self, index: I) -> &Self::Output {
&self.deref()[index]
}
}
impl<I> IndexMut<I> for $utfstring
where
I: RangeBounds<usize> + SliceIndex<[$uchar], Output = [$uchar]>,
{
#[inline]
fn index_mut(&mut self, index: I) -> &mut Self::Output {
&mut self.deref_mut()[index]
}
}
impl PartialEq<$utfstr> for $utfstring {
#[inline]
fn eq(&self, other: &$utfstr) -> bool {
self.as_slice() == other.as_slice()
}
}
impl PartialEq<&$utfstr> for $utfstring {
#[inline]
fn eq(&self, other: &&$utfstr) -> bool {
self.as_slice() == other.as_slice()
}
}
impl PartialEq<Cow<'_, $utfstr>> for $utfstring {
#[inline]
fn eq(&self, other: &Cow<'_, $utfstr>) -> bool {
self == other.as_ref()
}
}
impl PartialEq<$utfstring> for Cow<'_, $utfstr> {
#[inline]
fn eq(&self, other: &$utfstring) -> bool {
self.as_ref() == other
}
}
impl PartialEq<$utfstring> for $utfstr {
#[inline]
fn eq(&self, other: &$utfstring) -> bool {
self.as_slice() == other.as_slice()
}
}
impl PartialEq<$utfstring> for &$utfstr {
#[inline]
fn eq(&self, other: &$utfstring) -> bool {
self.as_slice() == other.as_slice()
}
}
impl PartialEq<str> for $utfstring {
#[inline]
fn eq(&self, other: &str) -> bool {
self.chars().eq(other.chars())
}
}
impl PartialEq<&str> for $utfstring {
#[inline]
fn eq(&self, other: &&str) -> bool {
self.chars().eq(other.chars())
}
}
impl PartialEq<$utfstring> for str {
#[inline]
fn eq(&self, other: &$utfstring) -> bool {
self.chars().eq(other.chars())
}
}
impl PartialEq<$utfstring> for &str {
#[inline]
fn eq(&self, other: &$utfstring) -> bool {
self.chars().eq(other.chars())
}
}
impl PartialEq<String> for $utfstring {
#[inline]
fn eq(&self, other: &String) -> bool {
self.chars().eq(other.chars())
}
}
impl PartialEq<$utfstring> for String {
#[inline]
fn eq(&self, other: &$utfstring) -> bool {
self.chars().eq(other.chars())
}
}
impl PartialEq<String> for $utfstr {
#[inline]
fn eq(&self, other: &String) -> bool {
self.chars().eq(other.chars())
}
}
impl PartialEq<$utfstr> for String {
#[inline]
fn eq(&self, other: &$utfstr) -> bool {
self.chars().eq(other.chars())
}
}
impl PartialEq<Cow<'_, str>> for $utfstring {
#[inline]
fn eq(&self, other: &Cow<'_, str>) -> bool {
self == other.as_ref()
}
}
impl PartialEq<$utfstring> for Cow<'_, str> {
#[inline]
fn eq(&self, other: &$utfstring) -> bool {
self.as_ref() == other
}
}
impl PartialEq<crate::$ustr> for $utfstring {
#[inline]
fn eq(&self, other: &crate::$ustr) -> bool {
self.as_slice() == other.as_slice()
}
}
impl PartialEq<$utfstring> for crate::$ustr {
#[inline]
fn eq(&self, other: &$utfstring) -> bool {
self.as_slice() == other.as_slice()
}
}
impl PartialEq<crate::$ustring> for $utfstring {
#[inline]
fn eq(&self, other: &crate::$ustring) -> bool {
self.as_slice() == other.as_slice()
}
}
impl PartialEq<$utfstring> for crate::$ustring {
#[inline]
fn eq(&self, other: &$utfstring) -> bool {
self.as_slice() == other.as_slice()
}
}
impl PartialEq<crate::$ustring> for $utfstr {
#[inline]
fn eq(&self, other: &crate::$ustring) -> bool {
self.as_slice() == other.as_slice()
}
}
impl PartialEq<$utfstr> for crate::$ustring {
#[inline]
fn eq(&self, other: &$utfstr) -> bool {
self.as_slice() == other.as_slice()
}
}
impl PartialEq<crate::$ucstr> for $utfstring {
#[inline]
fn eq(&self, other: &crate::$ucstr) -> bool {
self.as_slice() == other.as_slice()
}
}
impl PartialEq<$utfstring> for crate::$ucstr {
#[inline]
fn eq(&self, other: &$utfstring) -> bool {
self.as_slice() == other.as_slice()
}
}
impl PartialEq<crate::$ucstring> for $utfstring {
#[inline]
fn eq(&self, other: &crate::$ucstring) -> bool {
self.as_slice() == other.as_slice()
}
}
impl PartialEq<$utfstring> for crate::$ucstring {
#[inline]
fn eq(&self, other: &$utfstring) -> bool {
self.as_slice() == other.as_slice()
}
}
impl PartialEq<crate::$ucstring> for $utfstr {
#[inline]
fn eq(&self, other: &crate::$ucstring) -> bool {
self.as_slice() == other.as_slice()
}
}
impl PartialEq<$utfstr> for crate::$ucstring {
#[inline]
fn eq(&self, other: &$utfstr) -> bool {
self.as_slice() == other.as_slice()
}
}
impl ToOwned for $utfstr {
type Owned = $utfstring;
#[inline]
fn to_owned(&self) -> Self::Owned {
unsafe { $utfstring::from_vec_unchecked(&self.inner) }
}
}
impl TryFrom<crate::$ustring> for $utfstring {
type Error = $utferror;
#[inline]
fn try_from(value: crate::$ustring) -> Result<Self, Self::Error> {
$utfstring::from_ustring(value)
}
}
impl TryFrom<crate::$ucstring> for $utfstring {
type Error = $utferror;
#[inline]
fn try_from(value: crate::$ucstring) -> Result<Self, Self::Error> {
$utfstring::from_ustring(value)
}
}
impl TryFrom<&crate::$ustr> for $utfstring {
type Error = $utferror;
#[inline]
fn try_from(value: &crate::$ustr) -> Result<Self, Self::Error> {
$utfstring::from_ustring(value)
}
}
impl TryFrom<&crate::$ucstr> for $utfstring {
type Error = $utferror;
#[inline]
fn try_from(value: &crate::$ucstr) -> Result<Self, Self::Error> {
$utfstring::from_ustring(value)
}
}
impl Write for $utfstring {
#[inline]
fn write_str(&mut self, s: &str) -> core::fmt::Result {
self.push_str(s);
Ok(())
}
#[inline]
fn write_char(&mut self, c: char) -> core::fmt::Result {
self.push(c);
Ok(())
}
}
};
}
utfstring_common_impl! {
struct Utf16String([u16]);
type UtfStr = Utf16Str;
type UStr = U16Str;
type UCStr = U16CStr;
type UString = U16String;
type UCString = U16CString;
type UtfError = Utf16Error;
fn from_vec_unchecked() -> {}
fn from_str() -> {}
fn push_utfstr() -> {}
fn as_mut_vec() -> {}
}
utfstring_common_impl! {
struct Utf32String([u32]);
type UtfStr = Utf32Str;
type UStr = U32Str;
type UCStr = U32CStr;
type UString = U32String;
type UCString = U32CString;
type UtfError = Utf32Error;
fn from_vec_unchecked() -> {}
fn from_str() -> {}
fn push_utfstr() -> {}
fn as_mut_vec() -> {}
}
impl Utf16String {
pub fn from_vec(v: impl Into<Vec<u16>>) -> Result<Self, Utf16Error> {
let v = validate_utf16_vec(v.into())?;
Ok(unsafe { Self::from_vec_unchecked(v) })
}
#[must_use]
pub fn from_slice_lossy(s: &[u16]) -> Cow<'_, Utf16Str> {
match validate_utf16(s) {
Ok(()) => Cow::Borrowed(unsafe { Utf16Str::from_slice_unchecked(s) }),
Err(e) => {
let mut v = Vec::with_capacity(s.len());
v.extend_from_slice(&s[..e.index()]);
let mut index = e.index();
let mut replacement_char = [0; 2];
let replacement_char =
char::REPLACEMENT_CHARACTER.encode_utf16(&mut replacement_char);
while index < s.len() {
let u = s[index];
if is_utf16_surrogate(u) {
if is_utf16_low_surrogate(u) || index + 1 >= s.len() {
v.extend_from_slice(replacement_char);
} else {
let low = s[index + 1];
if is_utf16_low_surrogate(low) {
v.push(u);
v.push(low);
index += 1;
} else {
v.extend_from_slice(replacement_char);
}
}
} else {
v.push(u);
}
index += 1;
}
Cow::Owned(unsafe { Self::from_vec_unchecked(v) })
}
}
}
#[inline]
#[must_use]
pub unsafe fn from_ustring_unchecked(s: impl Into<crate::U16String>) -> Self {
Self::from_vec_unchecked(s.into().into_vec())
}
#[inline]
pub fn from_ustring(s: impl Into<crate::U16String>) -> Result<Self, Utf16Error> {
Self::from_vec(s.into().into_vec())
}
#[inline]
#[must_use]
pub fn from_ustr_lossy(s: &crate::U16Str) -> Cow<'_, Utf16Str> {
Self::from_slice_lossy(s.as_slice())
}
#[inline]
#[must_use]
pub unsafe fn from_ucstring_unchecked(s: impl Into<crate::U16CString>) -> Self {
Self::from_vec_unchecked(s.into().into_vec())
}
#[inline]
pub fn from_ucstring(s: impl Into<crate::U16CString>) -> Result<Self, Utf16Error> {
Self::from_vec(s.into().into_vec())
}
#[inline]
#[must_use]
pub fn from_ucstr_lossy(s: &crate::U16CStr) -> Cow<'_, Utf16Str> {
Self::from_slice_lossy(s.as_slice())
}
#[inline]
pub fn push(&mut self, ch: char) {
let mut buf = [0; 2];
self.inner.extend_from_slice(ch.encode_utf16(&mut buf))
}
#[inline]
pub fn truncate(&mut self, new_len: usize) {
if new_len <= self.len() {
assert!(self.is_char_boundary(new_len));
self.inner.truncate(new_len)
}
}
pub fn pop(&mut self) -> Option<char> {
let c = self.inner.pop();
if let Some(c) = c {
if is_utf16_low_surrogate(c) {
let high = self.inner.pop().unwrap();
Some(unsafe { decode_utf16_surrogate_pair(high, c) })
} else {
Some(unsafe { char::from_u32_unchecked(c as u32) })
}
} else {
None
}
}
#[inline]
pub fn remove(&mut self, idx: usize) -> char {
let c = self[idx..].chars().next().unwrap();
let next = idx + c.len_utf16();
let len = self.len();
unsafe {
ptr::copy(
self.inner.as_ptr().add(next),
self.inner.as_mut_ptr().add(idx),
len - next,
);
self.inner.set_len(len - (next - idx));
}
c
}
pub fn retain<F>(&mut self, mut f: F)
where
F: FnMut(char) -> bool,
{
let mut index = 0;
while index < self.len() {
let c = unsafe { self.get_unchecked(index..) }
.chars()
.next()
.unwrap();
if !f(c) {
self.inner.drain(index..index + c.len_utf16());
} else {
index += c.len_utf16();
}
}
}
#[inline]
pub fn insert(&mut self, idx: usize, ch: char) {
assert!(self.is_char_boundary(idx));
let mut bits = [0; 2];
let bits = ch.encode_utf16(&mut bits);
unsafe {
self.insert_slice(idx, bits);
}
}
#[inline]
pub fn insert_utfstr(&mut self, idx: usize, string: &Utf16Str) {
assert!(self.is_char_boundary(idx));
unsafe {
self.insert_slice(idx, string.as_slice());
}
}
#[inline]
#[must_use]
pub fn split_off(&mut self, at: usize) -> Self {
assert!(self.is_char_boundary(at));
unsafe { Self::from_vec_unchecked(self.inner.split_off(at)) }
}
pub fn drain<R>(&mut self, range: R) -> DrainUtf16<'_>
where
R: RangeBounds<usize>,
{
let core::ops::Range { start, end } = crate::range(range, ..self.len());
assert!(self.is_char_boundary(start));
assert!(self.is_char_boundary(end));
let self_ptr: *mut _ = self;
let chars_iter = unsafe { self.get_unchecked(start..end) }.chars();
DrainUtf16 {
start,
end,
iter: chars_iter,
string: self_ptr,
}
}
pub fn replace_range<R>(&mut self, range: R, replace_with: &Utf16Str)
where
R: RangeBounds<usize>,
{
use core::ops::Bound::*;
let start = range.start_bound();
match start {
Included(&n) => assert!(self.is_char_boundary(n)),
Excluded(&n) => assert!(self.is_char_boundary(n + 1)),
Unbounded => {}
};
let end = range.end_bound();
match end {
Included(&n) => assert!(self.is_char_boundary(n + 1)),
Excluded(&n) => assert!(self.is_char_boundary(n)),
Unbounded => {}
};
self.inner
.splice((start, end), replace_with.as_slice().iter().copied());
}
}
impl Utf32String {
pub fn from_vec(v: impl Into<Vec<u32>>) -> Result<Self, Utf32Error> {
let v = validate_utf32_vec(v.into())?;
Ok(unsafe { Self::from_vec_unchecked(v) })
}
#[must_use]
pub fn from_slice_lossy(s: &[u32]) -> Cow<'_, Utf32Str> {
match validate_utf32(s) {
Ok(()) => Cow::Borrowed(unsafe { Utf32Str::from_slice_unchecked(s) }),
Err(e) => {
let mut v = Vec::with_capacity(s.len());
v.extend_from_slice(&s[..e.index()]);
for u in s[e.index()..].iter().copied() {
if char::from_u32(u).is_some() {
v.push(u);
} else {
v.push(char::REPLACEMENT_CHARACTER as u32);
}
}
Cow::Owned(unsafe { Self::from_vec_unchecked(v) })
}
}
}
#[inline]
#[must_use]
pub unsafe fn from_ustring_unchecked(s: impl Into<crate::U32String>) -> Self {
Self::from_vec_unchecked(s.into().into_vec())
}
#[inline]
pub fn from_ustring(s: impl Into<crate::U32String>) -> Result<Self, Utf32Error> {
Self::from_vec(s.into().into_vec())
}
#[inline]
#[must_use]
pub fn from_ustr_lossy(s: &crate::U32Str) -> Cow<'_, Utf32Str> {
Self::from_slice_lossy(s.as_slice())
}
#[inline]
#[must_use]
pub unsafe fn from_ucstring_unchecked(s: impl Into<crate::U32CString>) -> Self {
Self::from_vec_unchecked(s.into().into_vec())
}
#[inline]
pub fn from_ucstring(s: impl Into<crate::U32CString>) -> Result<Self, Utf32Error> {
Self::from_vec(s.into().into_vec())
}
#[inline]
#[must_use]
pub fn from_ucstr_lossy(s: &crate::U32CStr) -> Cow<'_, Utf32Str> {
Self::from_slice_lossy(s.as_slice())
}
#[inline]
#[must_use]
pub fn from_chars(s: impl Into<Vec<char>>) -> Self {
#[allow(clippy::unsound_collection_transmute)]
unsafe {
let vec: Vec<u32> = mem::transmute(s.into());
Self::from_vec_unchecked(vec)
}
}
#[inline]
pub fn push(&mut self, ch: char) {
self.inner.push(ch.into())
}
#[inline]
pub fn truncate(&mut self, new_len: usize) {
self.inner.truncate(new_len)
}
#[inline]
pub fn pop(&mut self) -> Option<char> {
self.inner
.pop()
.map(|c| unsafe { core::char::from_u32_unchecked(c) })
}
#[inline]
pub fn remove(&mut self, idx: usize) -> char {
let next = idx + 1;
let len = self.len();
unsafe {
let c = core::char::from_u32_unchecked(self.inner[idx]);
ptr::copy(
self.inner.as_ptr().add(next),
self.inner.as_mut_ptr().add(idx),
len - next,
);
self.inner.set_len(len - (next - idx));
c
}
}
pub fn retain<F>(&mut self, mut f: F)
where
F: FnMut(char) -> bool,
{
let mut index = 0;
while index < self.len() {
let c = unsafe { self.get_unchecked(index..) }
.chars()
.next()
.unwrap();
if !f(c) {
self.inner.remove(index);
} else {
index += 1;
}
}
}
#[inline]
pub fn insert(&mut self, idx: usize, ch: char) {
unsafe {
self.insert_slice(idx, &[ch as u32]);
}
}
#[inline]
pub fn insert_utfstr(&mut self, idx: usize, string: &Utf32Str) {
unsafe {
self.insert_slice(idx, string.as_slice());
}
}
#[inline]
#[must_use]
pub fn split_off(&mut self, at: usize) -> Self {
unsafe { Self::from_vec_unchecked(self.inner.split_off(at)) }
}
pub fn drain<R>(&mut self, range: R) -> DrainUtf32<'_>
where
R: RangeBounds<usize>,
{
let core::ops::Range { start, end } = crate::range(range, ..self.len());
let self_ptr: *mut _ = self;
let chars_iter = unsafe { self.get_unchecked(start..end) }.chars();
DrainUtf32 {
start,
end,
iter: chars_iter,
string: self_ptr,
}
}
#[inline]
pub fn replace_range<R>(&mut self, range: R, replace_with: &Utf32Str)
where
R: RangeBounds<usize>,
{
self.inner
.splice(range, replace_with.as_slice().iter().copied());
}
#[allow(trivial_casts)]
#[inline]
#[must_use]
pub fn into_char_vec(self) -> Vec<char> {
let mut v = mem::ManuallyDrop::new(self.into_vec());
let (ptr, len, cap) = (v.as_mut_ptr(), v.len(), v.capacity());
unsafe { Vec::from_raw_parts(ptr as *mut char, len, cap) }
}
}
impl AsMut<[char]> for Utf32String {
#[inline]
fn as_mut(&mut self) -> &mut [char] {
self.as_char_slice_mut()
}
}
impl AsRef<[char]> for Utf32String {
#[inline]
fn as_ref(&self) -> &[char] {
self.as_char_slice()
}
}
impl From<Vec<char>> for Utf32String {
#[inline]
fn from(value: Vec<char>) -> Self {
Utf32String::from_chars(value)
}
}
impl From<&[char]> for Utf32String {
#[inline]
fn from(value: &[char]) -> Self {
Utf32String::from_chars(value)
}
}
impl From<Utf32String> for Vec<char> {
#[inline]
fn from(value: Utf32String) -> Self {
value.into_char_vec()
}
}
impl PartialEq<[char]> for Utf32String {
#[inline]
fn eq(&self, other: &[char]) -> bool {
self.as_char_slice() == other
}
}
impl PartialEq<Utf16String> for Utf32String {
#[inline]
fn eq(&self, other: &Utf16String) -> bool {
self.chars().eq(other.chars())
}
}
impl PartialEq<Utf32String> for Utf16String {
#[inline]
fn eq(&self, other: &Utf32String) -> bool {
self.chars().eq(other.chars())
}
}
impl PartialEq<&Utf16Str> for Utf32String {
#[inline]
fn eq(&self, other: &&Utf16Str) -> bool {
self.chars().eq(other.chars())
}
}
impl PartialEq<&Utf32Str> for Utf16String {
#[inline]
fn eq(&self, other: &&Utf32Str) -> bool {
self.chars().eq(other.chars())
}
}
impl PartialEq<Utf32String> for &Utf16Str {
#[inline]
fn eq(&self, other: &Utf32String) -> bool {
self.chars().eq(other.chars())
}
}
impl PartialEq<Utf16String> for &Utf32Str {
#[inline]
fn eq(&self, other: &Utf16String) -> bool {
self.chars().eq(other.chars())
}
}
impl TryFrom<Vec<u16>> for Utf16String {
type Error = Utf16Error;
#[inline]
fn try_from(value: Vec<u16>) -> Result<Self, Self::Error> {
Utf16String::from_vec(value)
}
}
impl TryFrom<Vec<u32>> for Utf32String {
type Error = Utf32Error;
#[inline]
fn try_from(value: Vec<u32>) -> Result<Self, Self::Error> {
Utf32String::from_vec(value)
}
}
impl TryFrom<&[u16]> for Utf16String {
type Error = Utf16Error;
#[inline]
fn try_from(value: &[u16]) -> Result<Self, Self::Error> {
Utf16String::from_vec(value)
}
}
impl TryFrom<&[u32]> for Utf32String {
type Error = Utf32Error;
#[inline]
fn try_from(value: &[u32]) -> Result<Self, Self::Error> {
Utf32String::from_vec(value)
}
}
#[cfg(not(windows))]
pub type WideUtfString = Utf32String;
#[cfg(windows)]
pub type WideUtfString = Utf16String;
#[cfg(test)]
mod test {
use crate::*;
#[test]
fn uft16_truncate() {
let cs = utf16str!("trunc");
let mut s: Utf16String = cs.into();
s.truncate(6);
assert_eq!(s.len(), 5);
s.truncate(5);
assert_eq!(s.len(), 5);
s.truncate(2);
assert_eq!(s.len(), 2);
}
}