#![allow(rustdoc::bare_urls)]
#![doc = include_str!("../README.md")]
#![allow(unstable_name_collisions)]
#![no_std]
extern crate alloc;
#[rustversion::before(1.84)]
use sptr::Strict;
use alloc::{
alloc::{alloc, dealloc, Layout},
borrow::{Cow, ToOwned},
boxed::Box,
str::Utf8Error,
string::String,
};
use core::{
cmp::Ordering,
fmt,
hash::{Hash, Hasher},
iter::FromIterator,
mem,
ops::Deref,
ptr, slice, str,
};
mod vint;
use crate::vint::VarInt;
#[cfg(feature = "rkyv")]
mod rkyv;
const HEAP_ALIGN: usize = 4;
const WIDTH: usize = mem::size_of::<usize>();
#[repr(packed)]
pub struct ColdString {
encoded: *const u8,
}
impl ColdString {
const TAG_MASK: usize = usize::from_ne_bytes(0b11000000usize.to_le_bytes());
const INLINE_TAG: usize = usize::from_ne_bytes(0b11111000usize.to_le_bytes());
const PTR_TAG: usize = usize::from_ne_bytes(0b10000000usize.to_le_bytes());
const LEN_MASK: usize = usize::from_ne_bytes(0b111usize.to_le_bytes());
const ROT: u32 = if cfg!(target_endian = "little") {
0
} else {
8 * (WIDTH - 1) as u32
};
pub fn from_utf8<B: AsRef<[u8]>>(v: B) -> Result<Self, Utf8Error> {
Ok(Self::new(str::from_utf8(v.as_ref())?))
}
pub unsafe fn from_utf8_unchecked<B: AsRef<[u8]>>(v: B) -> Self {
Self::new(str::from_utf8_unchecked(v.as_ref()))
}
pub fn new<T: AsRef<str>>(x: T) -> Self {
let s = x.as_ref();
if s.len() <= WIDTH {
Self::new_inline(s)
} else {
Self::new_heap(s)
}
}
#[inline]
const fn inline_buf(s: &str) -> [u8; WIDTH] {
debug_assert!(s.len() <= WIDTH);
let mut buf = [0u8; WIDTH];
if s.len() < WIDTH {
let tag =
(Self::INLINE_TAG | s.len().rotate_left(Self::ROT)).rotate_right(Self::ROT) as u8;
buf[0] = tag;
}
buf
}
#[rustversion::attr(since(1.61), const)]
#[inline]
fn from_inline_buf(b: [u8; WIDTH]) -> Self {
let encoded = ptr::null_mut::<u8>().wrapping_add(usize::from_ne_bytes(b));
Self { encoded }
}
#[inline]
const fn utf8_start(l: usize) -> usize {
(l < WIDTH) as usize
}
#[inline]
fn new_inline(s: &str) -> Self {
let mut buf = Self::inline_buf(s);
let start = Self::utf8_start(s.len());
buf[start..s.len() + start].copy_from_slice(s.as_bytes());
Self::from_inline_buf(buf)
}
#[rustversion::since(1.61)]
#[inline]
pub const fn new_inline_const(s: &str) -> Self {
if s.len() > WIDTH {
panic!(
"Length for `new_inline_const` must be less than `core::mem::size_of::<usize>()`."
);
}
let mut buf = Self::inline_buf(s);
let start = Self::utf8_start(s.len());
let mut i = 0;
while i < s.len() {
buf[i + start] = s.as_bytes()[i];
i += 1;
}
Self::from_inline_buf(buf)
}
#[rustversion::attr(since(1.71), const)]
#[inline]
unsafe fn ptr(&self) -> *const u8 {
ptr::read_unaligned(ptr::addr_of!(self.encoded))
}
#[inline]
fn addr(&self) -> usize {
unsafe { self.ptr().addr() }
}
#[inline]
fn tag(&self) -> usize {
self.addr() & Self::TAG_MASK
}
#[inline]
pub fn is_inline(&self) -> bool {
self.tag() != Self::PTR_TAG
}
#[inline]
fn new_heap(s: &str) -> Self {
let len = s.len();
let (vint_len, len_buf) = VarInt::write(len as u64);
let total = vint_len + len;
let layout = Layout::from_size_align(total, HEAP_ALIGN).unwrap();
unsafe {
let ptr = alloc(layout);
if ptr.is_null() {
alloc::alloc::handle_alloc_error(layout);
}
ptr::copy_nonoverlapping(len_buf.as_ptr(), ptr, vint_len);
ptr::copy_nonoverlapping(s.as_ptr(), ptr.add(vint_len), len);
let encoded = ptr.map_addr(|addr| {
debug_assert!(addr % HEAP_ALIGN == 0);
let mut addr = addr.rotate_left(6 + Self::ROT);
addr |= Self::PTR_TAG;
addr
});
Self { encoded }
}
}
#[inline]
fn heap_ptr(&self) -> *const u8 {
debug_assert!(!self.is_inline());
unsafe {
self.ptr().map_addr(|mut addr| {
addr ^= Self::PTR_TAG;
let addr = addr.rotate_right(6 + Self::ROT);
debug_assert!(addr % HEAP_ALIGN == 0);
addr
})
}
}
#[inline]
fn inline_len(&self) -> usize {
let addr = self.addr();
match addr & Self::INLINE_TAG {
Self::INLINE_TAG => (addr & Self::LEN_MASK).rotate_right(Self::ROT),
_ => WIDTH,
}
}
#[inline]
pub fn len(&self) -> usize {
if self.is_inline() {
self.inline_len()
} else {
unsafe {
let ptr = self.heap_ptr();
let (len, _) = VarInt::read(ptr);
len as usize
}
}
}
#[allow(unsafe_op_in_unsafe_fn)]
#[inline]
unsafe fn decode_inline(&self) -> &[u8] {
let len = self.inline_len();
let self_bytes_ptr = ptr::addr_of!(self.encoded) as *const u8;
let start = Self::utf8_start(len);
slice::from_raw_parts(self_bytes_ptr.add(start), len)
}
#[allow(unsafe_op_in_unsafe_fn)]
#[inline]
unsafe fn decode_heap(&self) -> &[u8] {
let ptr = self.heap_ptr();
let (len, header) = VarInt::read(ptr);
let data = ptr.add(header);
slice::from_raw_parts(data, len)
}
#[inline]
pub fn as_bytes(&self) -> &[u8] {
match self.is_inline() {
true => unsafe { self.decode_inline() },
false => unsafe { self.decode_heap() },
}
}
#[inline]
pub fn as_str(&self) -> &str {
unsafe { str::from_utf8_unchecked(self.as_bytes()) }
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
}
impl Default for ColdString {
fn default() -> Self {
Self::new_inline("")
}
}
impl Deref for ColdString {
type Target = str;
fn deref(&self) -> &str {
self.as_str()
}
}
impl Drop for ColdString {
fn drop(&mut self) {
if !self.is_inline() {
unsafe {
let ptr = self.heap_ptr();
let (len, header) = VarInt::read(ptr);
let total = header + len;
let layout = Layout::from_size_align(total, HEAP_ALIGN).unwrap();
dealloc(ptr as *mut u8, layout);
}
}
}
}
impl Clone for ColdString {
fn clone(&self) -> Self {
match self.is_inline() {
true => unsafe {
Self {
encoded: self.ptr(),
}
},
false => Self::new_heap(self.as_str()),
}
}
}
impl PartialEq for ColdString {
fn eq(&self, other: &Self) -> bool {
match (self.is_inline(), other.is_inline()) {
(true, true) => unsafe { self.ptr() == other.ptr() },
(false, false) => unsafe { self.decode_heap() == other.decode_heap() },
_ => false,
}
}
}
impl Eq for ColdString {}
impl Hash for ColdString {
fn hash<H: Hasher>(&self, state: &mut H) {
self.as_str().hash(state)
}
}
impl fmt::Debug for ColdString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self.as_str(), f)
}
}
impl fmt::Display for ColdString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.as_str(), f)
}
}
impl From<&str> for ColdString {
fn from(s: &str) -> Self {
Self::new(s)
}
}
impl From<String> for ColdString {
fn from(s: String) -> Self {
Self::new(&s)
}
}
impl From<ColdString> for String {
fn from(s: ColdString) -> Self {
s.as_str().to_owned()
}
}
impl From<ColdString> for Cow<'_, str> {
#[inline]
fn from(s: ColdString) -> Self {
Self::Owned(s.into())
}
}
impl<'a> From<&'a ColdString> for Cow<'a, str> {
#[inline]
fn from(s: &'a ColdString) -> Self {
Self::Borrowed(s)
}
}
impl<'a> From<Cow<'a, str>> for ColdString {
fn from(cow: Cow<'a, str>) -> Self {
match cow {
Cow::Borrowed(s) => s.into(),
Cow::Owned(s) => s.into(),
}
}
}
impl From<Box<str>> for ColdString {
#[inline]
#[track_caller]
fn from(b: Box<str>) -> Self {
Self::new(&b)
}
}
impl FromIterator<char> for ColdString {
fn from_iter<I: IntoIterator<Item = char>>(iter: I) -> Self {
let s: String = iter.into_iter().collect();
ColdString::new(&s)
}
}
unsafe impl Send for ColdString {}
unsafe impl Sync for ColdString {}
impl core::borrow::Borrow<str> for ColdString {
fn borrow(&self) -> &str {
self.as_str()
}
}
impl PartialEq<str> for ColdString {
fn eq(&self, other: &str) -> bool {
if self.is_inline() {
unsafe { self.decode_inline() == other.as_bytes() }
} else {
unsafe { self.decode_heap() == other.as_bytes() }
}
}
}
impl PartialEq<ColdString> for str {
fn eq(&self, other: &ColdString) -> bool {
other.eq(self)
}
}
impl PartialEq<&str> for ColdString {
fn eq(&self, other: &&str) -> bool {
self.eq(*other)
}
}
impl PartialEq<ColdString> for &str {
fn eq(&self, other: &ColdString) -> bool {
other.eq(*self)
}
}
impl AsRef<str> for ColdString {
#[inline]
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl AsRef<[u8]> for ColdString {
#[inline]
fn as_ref(&self) -> &[u8] {
self.as_bytes()
}
}
impl Ord for ColdString {
fn cmp(&self, other: &Self) -> Ordering {
self.as_str().cmp(other.as_str())
}
}
impl PartialOrd for ColdString {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.as_str().partial_cmp(other.as_str())
}
}
impl alloc::str::FromStr for ColdString {
type Err = core::convert::Infallible;
fn from_str(s: &str) -> Result<ColdString, Self::Err> {
Ok(ColdString::new(s))
}
}
#[cfg(feature = "serde")]
impl serde::Serialize for ColdString {
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_str(self.as_str())
}
}
#[cfg(feature = "serde")]
impl<'de> serde::Deserialize<'de> for ColdString {
fn deserialize<D: serde::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
let s = String::deserialize(d)?;
Ok(ColdString::new(&s))
}
}
#[cfg(all(test, feature = "serde"))]
mod serde_tests {
use super::*;
use serde_test::{assert_tokens, Token};
#[test]
fn test_serde_cold_string_inline() {
let cs = ColdString::new("ferris");
assert_tokens(&cs, &[Token::Str("ferris")]);
}
#[test]
fn test_serde_cold_string_heap() {
let long_str = "This is a significantly longer string for heap testing";
let cs = ColdString::new(long_str);
assert_tokens(&cs, &[Token::Str(long_str)]);
}
}
#[cfg(test)]
mod tests {
use super::*;
use core::hash::BuildHasher;
use hashbrown::hash_map::DefaultHashBuilder;
#[test]
fn test_layout() {
assert_eq!(mem::size_of::<ColdString>(), mem::size_of::<usize>());
assert_eq!(mem::align_of::<ColdString>(), 1);
struct Foo {
_s: ColdString,
_b: u8,
}
assert_eq!(mem::size_of::<Foo>(), mem::size_of::<usize>() + 1);
assert_eq!(mem::align_of::<Foo>(), 1);
}
#[test]
fn test_default() {
assert!(ColdString::default().is_empty());
assert_eq!(ColdString::default().len(), 0);
assert_eq!(ColdString::default(), "");
assert_eq!(ColdString::default(), ColdString::new(""));
}
fn assert_correct(s: &str) {
let cs = ColdString::new(s);
assert_eq!(s.len() <= mem::size_of::<usize>(), cs.is_inline());
assert_eq!(cs.len(), s.len());
assert_eq!(cs.as_bytes(), s.as_bytes());
assert_eq!(cs.as_str(), s);
assert_eq!(cs.clone(), cs);
let bh = DefaultHashBuilder::new();
let mut hasher1 = bh.build_hasher();
cs.hash(&mut hasher1);
let mut hasher2 = bh.build_hasher();
cs.clone().hash(&mut hasher2);
assert_eq!(hasher1.finish(), hasher2.finish());
assert_eq!(cs, s);
assert_eq!(s, cs);
assert_eq!(cs, *s);
assert_eq!(*s, cs);
}
#[test]
fn it_works() {
for s in [
"1",
"12",
"123",
"1234",
"12345",
"123456",
"1234567",
"12345678",
"123456789",
str::from_utf8(&[240, 159, 146, 150]).unwrap(),
"✅",
"❤️",
"🦀💯",
"🦀",
"💯",
"abcd",
"test",
"",
"\0",
"\0\0",
"\0\0\0",
"\0\0\0\0",
"\0\0\0\0\0\0\0",
"\0\0\0\0\0\0\0\0",
"1234567",
"12345678",
"longer test",
str::from_utf8(&[103, 39, 240, 145, 167, 156, 194, 165]).unwrap(),
"AaAa0 ® ",
str::from_utf8(&[240, 158, 186, 128, 240, 145, 143, 151]).unwrap(),
] {
assert_correct(s);
}
}
fn char_from_leading_byte(b: u8) -> Option<char> {
match b {
0x00..=0x7F => Some(b as char),
0xC2..=0xDF => str::from_utf8(&[b, 0x91]).unwrap().chars().next(),
0xE0 => str::from_utf8(&[b, 0xA0, 0x91]).unwrap().chars().next(),
0xE1..=0xEC | 0xEE..=0xEF => str::from_utf8(&[b, 0x91, 0xA5]).unwrap().chars().next(),
0xED => str::from_utf8(&[b, 0x80, 0x91]).unwrap().chars().next(),
0xF0 => str::from_utf8(&[b, 0x90, 0x91, 0xA5])
.unwrap()
.chars()
.next(),
0xF1..=0xF3 => str::from_utf8(&[b, 0x91, 0xA5, 0x82])
.unwrap()
.chars()
.next(),
0xF4 => str::from_utf8(&[b, 0x80, 0x91, 0x82])
.unwrap()
.chars()
.next(),
_ => None,
}
}
#[test]
fn test_edges() {
let width = mem::size_of::<usize>();
for len in [width - 1, width, width + 1] {
for first_byte in 0u8..=255 {
let first_char = match char_from_leading_byte(first_byte) {
Some(c) => c,
None => continue,
};
let mut s = String::with_capacity(len);
s.push(first_char);
while s.len() < len {
let c = core::char::from_digit((len - s.len()) as u32, 10).unwrap();
s.push(c);
}
assert_correct(&s);
}
}
}
#[test]
fn test_unaligned_placement() {
for s_content in ["torture", "tor", "tortures", "tort", "torture torture"] {
let mut buffer = [0u8; 32];
for offset in 0..8 {
unsafe {
let dst = buffer.as_mut_ptr().add(offset) as *mut ColdString;
let s = ColdString::new(s_content);
ptr::write_unaligned(dst, s);
let recovered = ptr::read_unaligned(dst);
assert_eq!(recovered.as_str(), s_content);
}
}
}
}
}