#![doc = include_str!("../README.md")]
#![cfg_attr(docsrs, feature(doc_cfg))]
#[doc(hidden)]
pub use core;
use core::borrow::{
Borrow,
BorrowMut,
};
use core::cmp::Ordering;
use core::hash::{
Hash,
Hasher,
};
use core::iter::FromIterator;
use core::ops::{
Add,
AddAssign,
Bound,
Deref,
DerefMut,
RangeBounds,
};
use core::str::{
FromStr,
Utf8Error,
};
use core::{
fmt,
slice,
};
use std::borrow::Cow;
use std::ffi::OsStr;
use std::iter::FusedIterator;
mod features;
mod macros;
mod repr;
use repr::Repr;
mod traits;
pub use traits::{
CompactStringExt,
ToCompactString,
};
#[cfg(test)]
mod tests;
#[derive(Clone)]
#[repr(transparent)]
pub struct CompactString(Repr);
impl CompactString {
#[inline]
pub fn new<T: AsRef<str>>(text: T) -> Self {
CompactString(Repr::new(text.as_ref()))
}
#[inline]
pub const fn new_inline(text: &str) -> Self {
CompactString(Repr::new_inline(text))
}
#[inline]
pub fn with_capacity(capacity: usize) -> Self {
CompactString(Repr::with_capacity(capacity))
}
#[inline]
pub fn from_utf8<B: AsRef<[u8]>>(buf: B) -> Result<Self, Utf8Error> {
Repr::from_utf8(buf).map(CompactString)
}
#[inline]
#[must_use]
pub unsafe fn from_utf8_unchecked<B: AsRef<[u8]>>(buf: B) -> Self {
CompactString(Repr::from_utf8_unchecked(buf))
}
#[inline]
pub fn from_utf16<B: AsRef<[u16]>>(buf: B) -> Result<Self, Utf16Error> {
let buf = buf.as_ref();
let mut ret = CompactString::with_capacity(buf.len());
for c in core::char::decode_utf16(buf.iter().copied()) {
if let Ok(c) = c {
ret.push(c);
} else {
return Err(Utf16Error(()));
}
}
Ok(ret)
}
#[inline]
pub fn from_utf16_lossy<B: AsRef<[u16]>>(buf: B) -> Self {
let buf = buf.as_ref();
let mut ret = CompactString::with_capacity(buf.len());
for c in std::char::decode_utf16(buf.iter().copied()) {
match c {
Ok(c) => ret.push(c),
Err(_) => ret.push_str("�"),
}
}
ret
}
#[inline]
pub fn len(&self) -> usize {
self.0.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
pub fn capacity(&self) -> usize {
self.0.capacity()
}
#[inline]
pub fn reserve(&mut self, additional: usize) {
self.0.reserve(additional)
}
#[inline]
pub fn as_str(&self) -> &str {
self.0.as_str()
}
#[inline]
pub fn as_mut_str(&mut self) -> &mut str {
let len = self.len();
unsafe { std::str::from_utf8_unchecked_mut(&mut self.0.as_mut_buf()[..len]) }
}
#[inline]
pub fn as_bytes(&self) -> &[u8] {
&self.0.as_slice()[..self.len()]
}
#[inline]
pub unsafe fn as_mut_bytes(&mut self) -> &mut [u8] {
self.0.as_mut_buf()
}
pub fn push(&mut self, ch: char) {
self.push_str(ch.encode_utf8(&mut [0; 4]));
}
#[inline]
pub fn pop(&mut self) -> Option<char> {
self.0.pop()
}
#[inline]
pub fn push_str(&mut self, s: &str) {
self.0.push_str(s)
}
#[inline]
pub fn remove(&mut self, idx: usize) -> char {
let len = self.len();
let substr = &mut self.as_mut_str()[idx..];
let ch = substr
.chars()
.next()
.expect("cannot remove a char from the end of a string");
let ch_len = ch.len_utf8();
let num_bytes = substr.len() - ch_len;
let ptr = substr.as_mut_ptr();
unsafe {
core::ptr::copy(ptr.add(ch_len) as *const u8, ptr, num_bytes);
self.set_len(len - ch_len);
}
ch
}
#[inline]
pub unsafe fn set_len(&mut self, new_len: usize) {
self.0.set_len(new_len)
}
#[inline]
pub fn is_heap_allocated(&self) -> bool {
self.0.is_heap_allocated()
}
#[inline]
fn ensure_range(&self, range: impl RangeBounds<usize>) -> (usize, usize) {
#[cold]
#[inline(never)]
fn illegal_range() -> ! {
panic!("illegal range");
}
let start = match range.start_bound() {
Bound::Included(&n) => n,
Bound::Excluded(&n) => match n.checked_add(1) {
Some(n) => n,
None => illegal_range(),
},
Bound::Unbounded => 0,
};
let end = match range.end_bound() {
Bound::Included(&n) => match n.checked_add(1) {
Some(n) => n,
None => illegal_range(),
},
Bound::Excluded(&n) => n,
Bound::Unbounded => self.len(),
};
if end < start {
illegal_range();
}
let s = self.as_str();
if !s.is_char_boundary(start) || !s.is_char_boundary(end) {
illegal_range();
}
(start, end)
}
#[inline]
pub fn replace_range(&mut self, range: impl RangeBounds<usize>, replace_with: &str) {
let (start, end) = self.ensure_range(range);
let dest_len = end - start;
match dest_len.cmp(&replace_with.len()) {
Ordering::Equal => unsafe { self.replace_range_same_size(start, end, replace_with) },
Ordering::Greater => unsafe { self.replace_range_shrink(start, end, replace_with) },
Ordering::Less => unsafe { self.replace_range_grow(start, end, replace_with) },
}
}
unsafe fn replace_range_same_size(&mut self, start: usize, end: usize, replace_with: &str) {
core::ptr::copy_nonoverlapping(
replace_with.as_ptr(),
self.as_mut_ptr().add(start),
end - start,
);
}
unsafe fn replace_range_shrink(&mut self, start: usize, end: usize, replace_with: &str) {
let total_len = self.len();
let dest_len = end - start;
let new_len = total_len - (dest_len - replace_with.len());
let amount = total_len - end;
let data = self.as_mut_ptr();
core::ptr::copy_nonoverlapping(replace_with.as_ptr(), data.add(start), replace_with.len());
core::ptr::copy(
data.add(total_len - amount),
data.add(new_len - amount),
amount,
);
self.set_len(new_len);
}
unsafe fn replace_range_grow(&mut self, start: usize, end: usize, replace_with: &str) {
let dest_len = end - start;
self.reserve(replace_with.len() - dest_len);
let total_len = self.len();
let new_len = total_len + (replace_with.len() - dest_len);
let amount = total_len - end;
self.set_len(new_len);
let data = self.as_mut_ptr();
core::ptr::copy(
data.add(total_len - amount),
data.add(new_len - amount),
amount,
);
core::ptr::copy_nonoverlapping(replace_with.as_ptr(), data.add(start), replace_with.len());
}
pub fn truncate(&mut self, new_len: usize) {
let s = self.as_str();
if new_len >= s.len() {
return;
}
assert!(
s.is_char_boundary(new_len),
"new_len must lie on char boundary",
);
unsafe { self.set_len(new_len) };
}
#[inline]
pub fn as_ptr(&self) -> *const u8 {
self.0.as_slice().as_ptr()
}
#[inline]
pub fn as_mut_ptr(&mut self) -> *mut u8 {
unsafe { self.0.as_mut_buf().as_mut_ptr() }
}
pub fn insert_str(&mut self, idx: usize, string: &str) {
assert!(self.is_char_boundary(idx), "idx must lie on char boundary");
let new_len = self.len() + string.len();
self.reserve(string.len());
unsafe {
let data = self.as_mut_ptr();
std::ptr::copy(
data.add(idx),
data.add(idx + string.len()),
new_len - idx - string.len(),
);
std::ptr::copy_nonoverlapping(string.as_ptr(), data.add(idx), string.len());
self.set_len(new_len);
}
}
pub fn insert(&mut self, idx: usize, ch: char) {
self.insert_str(idx, ch.encode_utf8(&mut [0; 4]));
}
pub fn clear(&mut self) {
unsafe { self.set_len(0) };
}
pub fn split_off(&mut self, at: usize) -> Self {
let result = self[at..].into();
unsafe { self.set_len(at) };
result
}
pub fn drain(&mut self, range: impl RangeBounds<usize>) -> Drain<'_> {
let (start, end) = self.ensure_range(range);
Drain {
compact_string: self as *mut Self,
start,
end,
chars: self[start..end].chars(),
}
}
#[inline]
pub fn shrink_to(&mut self, min_capacity: usize) {
self.0.shrink_to(min_capacity);
}
#[inline]
pub fn shrink_to_fit(&mut self) {
self.0.shrink_to(0);
}
pub fn retain(&mut self, mut predicate: impl FnMut(char) -> bool) {
let s = self.as_mut_str();
let mut dest_idx = 0;
let mut src_idx = 0;
while let Some(ch) = s[src_idx..].chars().next() {
let ch_len = ch.len_utf8();
if predicate(ch) {
unsafe {
let p = s.as_mut_ptr();
core::ptr::copy(p.add(src_idx), p.add(dest_idx), ch_len);
}
dest_idx += ch_len;
}
src_idx += ch_len;
}
unsafe { self.set_len(dest_idx) };
}
pub fn from_utf8_lossy(v: &[u8]) -> Self {
fn next_char<'a>(
iter: &mut <&[u8] as IntoIterator>::IntoIter,
buf: &'a mut [u8; 4],
) -> Option<&'a [u8]> {
const REPLACEMENT: &[u8] = "\u{FFFD}".as_bytes();
macro_rules! ensure_range {
($idx:literal, $range:pat) => {{
let mut i = iter.clone();
match i.next() {
Some(&c) if matches!(c, $range) => {
buf[$idx] = c;
*iter = i;
}
_ => return Some(REPLACEMENT),
}
}};
}
macro_rules! ensure_cont {
($idx:literal) => {{
ensure_range!($idx, 0x80..=0xBF);
}};
}
let c = *iter.next()?;
buf[0] = c;
match c {
0x00..=0x7F => {
Some(&buf[..1])
}
0xC2..=0xDF => {
ensure_cont!(1);
Some(&buf[..2])
}
0xE0..=0xEF => {
match c {
0xE0 => ensure_range!(1, 0xA0..=0xBF),
0xED => ensure_range!(1, 0x80..=0x9F),
_ => ensure_cont!(1),
}
ensure_cont!(2);
Some(&buf[..3])
}
0xF0..=0xF4 => {
match c {
0xF0 => ensure_range!(1, 0x90..=0xBF),
0xF4 => ensure_range!(1, 0x80..=0x8F),
_ => ensure_cont!(1),
}
ensure_cont!(2);
ensure_cont!(3);
Some(&buf[..4])
}
| 0x80..=0xBF | 0xC0..=0xC1 | 0xF5..=0xF7 | 0xF8..=0xFB | 0xFC..=0xFD | 0xFE..=0xFF => Some(REPLACEMENT), }
}
let mut buf = [0; 4];
let mut result = Self::with_capacity(v.len());
let mut iter = v.iter();
while let Some(s) = next_char(&mut iter, &mut buf) {
let s = unsafe { std::str::from_utf8_unchecked(s) };
result.push_str(s);
}
result
}
fn from_utf16x(
v: &[u8],
from_int: impl Fn(u16) -> u16,
from_bytes: impl Fn([u8; 2]) -> u16,
) -> Result<Self, Utf16Error> {
if v.len() % 2 != 0 {
return Err(Utf16Error(()));
}
let mut result = CompactString::with_capacity(v.len() / 2);
match unsafe { v.align_to::<u16>() } {
(&[], v, &[]) => {
for c in std::char::decode_utf16(v.iter().copied().map(from_int)) {
result.push(c.map_err(|_| Utf16Error(()))?);
}
}
_ => {
let v = unsafe { slice::from_raw_parts(v.as_ptr().cast(), v.len() / 2) };
for c in std::char::decode_utf16(v.iter().copied().map(from_bytes)) {
result.push(c.map_err(|_| Utf16Error(()))?);
}
}
}
Ok(result)
}
fn from_utf16x_lossy(
v: &[u8],
from_int: impl Fn(u16) -> u16,
from_bytes: impl Fn([u8; 2]) -> u16,
) -> Self {
let (trailing_extra_byte, v) = match v.len() % 2 != 0 {
true => (true, &v[..v.len() - 1]),
false => (false, v),
};
let mut result = CompactString::with_capacity(v.len() / 2);
match unsafe { v.align_to::<u16>() } {
(&[], v, &[]) => {
for c in std::char::decode_utf16(v.iter().copied().map(from_int)) {
match c {
Ok(c) => result.push(c),
Err(_) => result.push_str("�"),
}
}
}
_ => {
let v = unsafe { slice::from_raw_parts(v.as_ptr().cast(), v.len() / 2) };
for c in std::char::decode_utf16(v.iter().copied().map(from_bytes)) {
match c {
Ok(c) => result.push(c),
Err(_) => result.push_str("�"),
}
}
}
}
if trailing_extra_byte {
result.push_str("�");
}
result
}
#[inline]
pub fn from_utf16le(v: impl AsRef<[u8]>) -> Result<Self, Utf16Error> {
CompactString::from_utf16x(v.as_ref(), u16::from_le, u16::from_le_bytes)
}
#[inline]
pub fn from_utf16be(v: impl AsRef<[u8]>) -> Result<Self, Utf16Error> {
CompactString::from_utf16x(v.as_ref(), u16::from_be, u16::from_be_bytes)
}
#[inline]
pub fn from_utf16le_lossy(v: impl AsRef<[u8]>) -> Self {
CompactString::from_utf16x_lossy(v.as_ref(), u16::from_le, u16::from_le_bytes)
}
#[inline]
pub fn from_utf16be_lossy(v: impl AsRef<[u8]>) -> Self {
CompactString::from_utf16x_lossy(v.as_ref(), u16::from_be, u16::from_be_bytes)
}
pub fn into_string(self) -> String {
self.0.into_string()
}
#[inline]
pub fn from_string_buffer(s: String) -> Self {
let repr = Repr::from_string(s, false);
CompactString(repr)
}
}
impl Default for CompactString {
#[inline]
fn default() -> Self {
CompactString::new("")
}
}
impl Deref for CompactString {
type Target = str;
#[inline]
fn deref(&self) -> &str {
self.as_str()
}
}
impl DerefMut for CompactString {
#[inline]
fn deref_mut(&mut self) -> &mut str {
self.as_mut_str()
}
}
impl AsRef<str> for CompactString {
#[inline]
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl AsRef<OsStr> for CompactString {
#[inline]
fn as_ref(&self) -> &OsStr {
OsStr::new(self.as_str())
}
}
impl AsRef<[u8]> for CompactString {
#[inline]
fn as_ref(&self) -> &[u8] {
self.as_bytes()
}
}
impl Borrow<str> for CompactString {
#[inline]
fn borrow(&self) -> &str {
self.as_str()
}
}
impl BorrowMut<str> for CompactString {
#[inline]
fn borrow_mut(&mut self) -> &mut str {
self.as_mut_str()
}
}
impl Eq for CompactString {}
impl<T: AsRef<str>> PartialEq<T> for CompactString {
fn eq(&self, other: &T) -> bool {
self.as_str() == other.as_ref()
}
}
impl PartialEq<CompactString> for String {
fn eq(&self, other: &CompactString) -> bool {
self.as_str() == other.as_str()
}
}
impl PartialEq<CompactString> for &str {
fn eq(&self, other: &CompactString) -> bool {
*self == other.as_str()
}
}
impl<'a> PartialEq<CompactString> for Cow<'a, str> {
fn eq(&self, other: &CompactString) -> bool {
*self == other.as_str()
}
}
impl Ord for CompactString {
fn cmp(&self, other: &Self) -> Ordering {
self.as_str().cmp(other.as_str())
}
}
impl PartialOrd for CompactString {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Hash for CompactString {
fn hash<H: Hasher>(&self, state: &mut H) {
self.as_str().hash(state)
}
}
impl<'a> From<&'a str> for CompactString {
fn from(s: &'a str) -> Self {
let repr = Repr::new(s);
CompactString(repr)
}
}
impl From<String> for CompactString {
fn from(s: String) -> Self {
let repr = Repr::from_string(s, true);
CompactString(repr)
}
}
impl<'a> From<&'a String> for CompactString {
fn from(s: &'a String) -> Self {
CompactString::new(s)
}
}
impl<'a> From<Cow<'a, str>> for CompactString {
fn from(cow: Cow<'a, str>) -> Self {
match cow {
Cow::Borrowed(s) => s.into(),
Cow::Owned(s) => s.into(),
}
}
}
impl From<Box<str>> for CompactString {
fn from(b: Box<str>) -> Self {
let s = b.into_string();
let repr = Repr::from_string(s, true);
CompactString(repr)
}
}
impl From<CompactString> for String {
#[inline]
fn from(s: CompactString) -> Self {
s.into_string()
}
}
impl From<CompactString> for Cow<'_, str> {
#[inline]
fn from(s: CompactString) -> Self {
Self::Owned(s.into_string())
}
}
impl<'a> From<&'a CompactString> for Cow<'a, str> {
#[inline]
fn from(s: &'a CompactString) -> Self {
Self::Borrowed(s)
}
}
impl FromStr for CompactString {
type Err = core::convert::Infallible;
fn from_str(s: &str) -> Result<CompactString, Self::Err> {
Ok(CompactString::from(s))
}
}
impl fmt::Debug for CompactString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self.as_str(), f)
}
}
impl fmt::Display for CompactString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.as_str(), f)
}
}
impl FromIterator<char> for CompactString {
fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
let repr = iter.into_iter().collect();
CompactString(repr)
}
}
impl<'a> FromIterator<&'a char> for CompactString {
fn from_iter<T: IntoIterator<Item = &'a char>>(iter: T) -> Self {
let repr = iter.into_iter().collect();
CompactString(repr)
}
}
impl<'a> FromIterator<&'a str> for CompactString {
fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
let repr = iter.into_iter().collect();
CompactString(repr)
}
}
impl FromIterator<Box<str>> for CompactString {
fn from_iter<T: IntoIterator<Item = Box<str>>>(iter: T) -> Self {
let repr = iter.into_iter().collect();
CompactString(repr)
}
}
impl<'a> FromIterator<Cow<'a, str>> for CompactString {
fn from_iter<T: IntoIterator<Item = Cow<'a, str>>>(iter: T) -> Self {
let repr = iter.into_iter().collect();
CompactString(repr)
}
}
impl FromIterator<String> for CompactString {
fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
let repr = iter.into_iter().collect();
CompactString(repr)
}
}
impl FromIterator<CompactString> for CompactString {
fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
let repr = iter.into_iter().collect();
CompactString(repr)
}
}
impl FromIterator<CompactString> for String {
fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
let mut iterator = iter.into_iter();
match iterator.next() {
None => String::new(),
Some(buf) => {
let mut buf = buf.into_string();
buf.extend(iterator);
buf
}
}
}
}
impl FromIterator<CompactString> for Cow<'_, str> {
fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
String::from_iter(iter).into()
}
}
impl Extend<char> for CompactString {
fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
self.0.extend(iter)
}
}
impl<'a> Extend<&'a char> for CompactString {
fn extend<T: IntoIterator<Item = &'a char>>(&mut self, iter: T) {
self.0.extend(iter)
}
}
impl<'a> Extend<&'a str> for CompactString {
fn extend<T: IntoIterator<Item = &'a str>>(&mut self, iter: T) {
self.0.extend(iter)
}
}
impl Extend<Box<str>> for CompactString {
fn extend<T: IntoIterator<Item = Box<str>>>(&mut self, iter: T) {
self.0.extend(iter)
}
}
impl<'a> Extend<Cow<'a, str>> for CompactString {
fn extend<T: IntoIterator<Item = Cow<'a, str>>>(&mut self, iter: T) {
iter.into_iter().for_each(move |s| self.push_str(&s));
}
}
impl Extend<String> for CompactString {
fn extend<T: IntoIterator<Item = String>>(&mut self, iter: T) {
self.0.extend(iter)
}
}
impl Extend<CompactString> for String {
fn extend<T: IntoIterator<Item = CompactString>>(&mut self, iter: T) {
for s in iter {
self.push_str(&s);
}
}
}
impl Extend<CompactString> for CompactString {
fn extend<T: IntoIterator<Item = CompactString>>(&mut self, iter: T) {
for s in iter {
self.push_str(&s);
}
}
}
impl<'a> Extend<CompactString> for Cow<'a, str> {
fn extend<T: IntoIterator<Item = CompactString>>(&mut self, iter: T) {
self.to_mut().extend(iter);
}
}
impl fmt::Write for CompactString {
fn write_str(&mut self, s: &str) -> fmt::Result {
self.push_str(s);
Ok(())
}
fn write_fmt(mut self: &mut Self, args: fmt::Arguments<'_>) -> fmt::Result {
match args.as_str() {
Some(s) => {
self.push_str(s);
Ok(())
}
None => fmt::write(&mut self, args),
}
}
}
impl Add<&str> for CompactString {
type Output = Self;
fn add(mut self, rhs: &str) -> Self::Output {
self.push_str(rhs);
self
}
}
impl AddAssign<&str> for CompactString {
fn add_assign(&mut self, rhs: &str) {
self.push_str(rhs);
}
}
#[derive(Copy, Clone, Debug)]
pub struct Utf16Error(());
impl fmt::Display for Utf16Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt("invalid utf-16: lone surrogate found", f)
}
}
#[must_use = "iterators are lazy and do nothing unless consumed"]
pub struct Drain<'a> {
compact_string: *mut CompactString,
start: usize,
end: usize,
chars: std::str::Chars<'a>,
}
unsafe impl Send for Drain<'_> {}
unsafe impl Sync for Drain<'_> {}
impl fmt::Debug for Drain<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_tuple("Drain").field(&self.as_str()).finish()
}
}
impl fmt::Display for Drain<'_> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
impl Drop for Drain<'_> {
#[inline]
fn drop(&mut self) {
unsafe { (*self.compact_string).replace_range_shrink(self.start, self.end, "") };
}
}
impl Drain<'_> {
#[inline]
pub fn as_str(&self) -> &str {
self.chars.as_str()
}
}
impl Deref for Drain<'_> {
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl Iterator for Drain<'_> {
type Item = char;
#[inline]
fn next(&mut self) -> Option<char> {
self.chars.next()
}
#[inline]
fn count(self) -> usize {
self.chars.clone().count()
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.chars.size_hint()
}
#[inline]
fn last(mut self) -> Option<char> {
self.chars.next_back()
}
}
impl DoubleEndedIterator for Drain<'_> {
#[inline]
fn next_back(&mut self) -> Option<char> {
self.chars.next_back()
}
}
impl FusedIterator for Drain<'_> {}
static_assertions::assert_eq_size!(CompactString, String);