#![allow(unsafe_code)]
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::borrow::Borrow;
use std::cmp::Ordering;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::ops::Deref;
use std::sync::Arc;
pub const SMOL_STR_INLINE_CAP: usize = 22;
#[derive(Clone)]
pub struct SmolStr {
repr: SmolStrRepr,
}
#[derive(Clone)]
enum SmolStrRepr {
Inline {
len: u8,
data: [u8; SMOL_STR_INLINE_CAP],
},
Heap(Arc<str>),
}
impl SmolStr {
#[inline]
pub const fn new_empty() -> Self {
Self {
repr: SmolStrRepr::Inline {
len: 0,
data: [0u8; SMOL_STR_INLINE_CAP],
},
}
}
#[inline]
fn inline(len: u8, data: [u8; SMOL_STR_INLINE_CAP]) -> Self {
debug_assert!((len as usize) <= SMOL_STR_INLINE_CAP);
Self {
repr: SmolStrRepr::Inline { len, data },
}
}
#[inline]
fn heap(arc: Arc<str>) -> Self {
Self {
repr: SmolStrRepr::Heap(arc),
}
}
#[inline]
pub fn as_str(&self) -> &str {
match &self.repr {
SmolStrRepr::Inline { len, data } => {
let slice = &data[..*len as usize];
unsafe { std::str::from_utf8_unchecked(slice) }
}
SmolStrRepr::Heap(arc) => arc,
}
}
#[inline]
pub fn len(&self) -> usize {
match &self.repr {
SmolStrRepr::Inline { len, .. } => *len as usize,
SmolStrRepr::Heap(arc) => arc.len(),
}
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
pub fn is_inline(&self) -> bool {
matches!(&self.repr, SmolStrRepr::Inline { .. })
}
#[inline]
pub fn is_ascii(&self) -> bool {
match &self.repr {
SmolStrRepr::Inline { len, data } => data[..*len as usize].is_ascii(),
SmolStrRepr::Heap(arc) => arc.is_ascii(),
}
}
#[inline]
pub fn from_borrowed(s: &str) -> Self {
let bytes = s.as_bytes();
if bytes.len() <= SMOL_STR_INLINE_CAP {
let mut data = [0u8; SMOL_STR_INLINE_CAP];
data[..bytes.len()].copy_from_slice(bytes);
Self::inline(bytes.len() as u8, data)
} else {
Self::heap(Arc::from(s))
}
}
#[inline]
pub fn from_string(s: String) -> Self {
if s.len() <= SMOL_STR_INLINE_CAP {
SmolStr::from_borrowed(s.as_str())
} else {
Self::heap(Arc::from(s))
}
}
#[inline]
pub fn concat(a: &str, b: &str) -> Self {
let total = a.len() + b.len();
if total <= SMOL_STR_INLINE_CAP {
let mut data = [0u8; SMOL_STR_INLINE_CAP];
data[..a.len()].copy_from_slice(a.as_bytes());
data[a.len()..total].copy_from_slice(b.as_bytes());
Self::inline(total as u8, data)
} else {
let mut buf = String::with_capacity(total);
buf.push_str(a);
buf.push_str(b);
Self::heap(Arc::from(buf))
}
}
#[inline]
pub fn concat_many(slices: &[&str]) -> Self {
let total: usize = slices.iter().map(|s| s.len()).sum();
if total <= SMOL_STR_INLINE_CAP {
let mut data = [0u8; SMOL_STR_INLINE_CAP];
let mut offset = 0usize;
for s in slices {
let bytes = s.as_bytes();
data[offset..offset + bytes.len()].copy_from_slice(bytes);
offset += bytes.len();
}
Self::inline(total as u8, data)
} else {
let mut buf = String::with_capacity(total);
for s in slices {
buf.push_str(s);
}
Self::heap(Arc::from(buf))
}
}
#[inline]
pub fn into_string(self) -> String {
self.as_str().to_owned()
}
#[inline]
pub fn try_build_inline<F>(out_len: usize, write: F) -> Option<Self>
where
F: FnOnce(&mut [u8]),
{
if out_len > SMOL_STR_INLINE_CAP {
return None;
}
let mut data = [0u8; SMOL_STR_INLINE_CAP];
write(&mut data[..out_len]);
std::str::from_utf8(&data[..out_len]).ok()?;
Some(Self::inline(out_len as u8, data))
}
}
impl Default for SmolStr {
#[inline]
fn default() -> Self {
SmolStr::new_empty()
}
}
impl Deref for SmolStr {
type Target = str;
#[inline]
fn deref(&self) -> &str {
self.as_str()
}
}
impl AsRef<str> for SmolStr {
#[inline]
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl Borrow<str> for SmolStr {
#[inline]
fn borrow(&self) -> &str {
self.as_str()
}
}
impl From<&str> for SmolStr {
#[inline]
fn from(s: &str) -> Self {
SmolStr::from_borrowed(s)
}
}
impl std::str::FromStr for SmolStr {
type Err = std::convert::Infallible;
#[inline]
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(SmolStr::from_borrowed(s))
}
}
impl From<String> for SmolStr {
#[inline]
fn from(s: String) -> Self {
SmolStr::from_string(s)
}
}
impl From<&String> for SmolStr {
#[inline]
fn from(s: &String) -> Self {
SmolStr::from_borrowed(s.as_str())
}
}
impl From<SmolStr> for String {
#[inline]
fn from(s: SmolStr) -> Self {
s.into_string()
}
}
impl From<&SmolStr> for String {
#[inline]
fn from(s: &SmolStr) -> Self {
s.as_str().to_owned()
}
}
impl fmt::Debug for SmolStr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self.as_str(), f)
}
}
impl fmt::Display for SmolStr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.as_str(), f)
}
}
impl PartialEq for SmolStr {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.as_str() == other.as_str()
}
}
impl Eq for SmolStr {}
impl PartialEq<str> for SmolStr {
#[inline]
fn eq(&self, other: &str) -> bool {
self.as_str() == other
}
}
impl PartialEq<&str> for SmolStr {
#[inline]
fn eq(&self, other: &&str) -> bool {
self.as_str() == *other
}
}
impl PartialEq<String> for SmolStr {
#[inline]
fn eq(&self, other: &String) -> bool {
self.as_str() == other.as_str()
}
}
impl PartialEq<SmolStr> for str {
#[inline]
fn eq(&self, other: &SmolStr) -> bool {
self == other.as_str()
}
}
impl PartialEq<SmolStr> for &str {
#[inline]
fn eq(&self, other: &SmolStr) -> bool {
*self == other.as_str()
}
}
impl PartialEq<SmolStr> for String {
#[inline]
fn eq(&self, other: &SmolStr) -> bool {
self.as_str() == other.as_str()
}
}
impl PartialOrd for SmolStr {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for SmolStr {
#[inline]
fn cmp(&self, other: &Self) -> Ordering {
self.as_str().cmp(other.as_str())
}
}
impl Hash for SmolStr {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
self.as_str().hash(state)
}
}
impl Serialize for SmolStr {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(self.as_str())
}
}
impl<'de> Deserialize<'de> for SmolStr {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Ok(SmolStr::from_string(s))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_is_inline() {
let s = SmolStr::new_empty();
assert!(s.is_inline());
assert_eq!(s.len(), 0);
assert_eq!(s.as_str(), "");
}
#[test]
fn short_payload_stays_inline() {
let s = SmolStr::from_borrowed("hello");
assert!(s.is_inline());
assert_eq!(s.as_str(), "hello");
assert_eq!(s.len(), 5);
}
#[test]
fn cap_boundary_inline() {
let payload = "a".repeat(SMOL_STR_INLINE_CAP);
let s = SmolStr::from_borrowed(&payload);
assert!(s.is_inline());
assert_eq!(s.len(), SMOL_STR_INLINE_CAP);
assert_eq!(s.as_str(), payload);
}
#[test]
fn one_past_cap_goes_heap() {
let payload = "a".repeat(SMOL_STR_INLINE_CAP + 1);
let s = SmolStr::from_borrowed(&payload);
assert!(!s.is_inline());
assert_eq!(s.len(), SMOL_STR_INLINE_CAP + 1);
assert_eq!(s.as_str(), payload);
}
#[test]
fn clone_inline_does_not_alloc_heap() {
let s = SmolStr::from_borrowed("short");
let c = s.clone();
assert!(c.is_inline());
assert_eq!(s, c);
}
#[test]
fn clone_heap_shares_arc() {
let s = SmolStr::from_borrowed(&"x".repeat(40));
let c = s.clone();
match (&s.repr, &c.repr) {
(SmolStrRepr::Heap(a), SmolStrRepr::Heap(b)) => {
assert!(
Arc::ptr_eq(a, b),
"Heap clone should share the same Arc allocation"
);
}
_ => panic!("expected both heap variants"),
}
}
#[test]
fn round_trip_serde() {
let s = SmolStr::from_borrowed("hello world");
let json = serde_json::to_string(&s).unwrap();
assert_eq!(json, "\"hello world\"");
let back: SmolStr = serde_json::from_str(&json).unwrap();
assert_eq!(back, s);
}
#[test]
fn eq_against_str_and_string() {
let s = SmolStr::from_borrowed("k");
assert_eq!(s, "k");
assert_eq!(s, *"k");
assert_eq!(s, String::from("k"));
assert_eq!(String::from("k"), s);
}
#[test]
fn size_is_24_bytes() {
assert_eq!(std::mem::size_of::<SmolStr>(), 24);
}
#[test]
fn concat_many_empty_is_empty_inline() {
let s = SmolStr::concat_many(&[]);
assert!(s.is_inline());
assert_eq!(s.len(), 0);
assert_eq!(s.as_str(), "");
}
#[test]
fn concat_many_single_slice_matches_from_borrowed() {
let s = SmolStr::concat_many(&["hello"]);
assert!(s.is_inline());
assert_eq!(s.as_str(), "hello");
}
#[test]
fn concat_many_inline_path() {
let s = SmolStr::concat_many(&["aaaaa", "bbbbb", "ccccc", "ddddd"]);
assert!(s.is_inline());
assert_eq!(s.as_str(), "aaaaabbbbbcccccddddd");
assert_eq!(s.len(), 20);
}
#[test]
fn concat_many_at_cap_inline() {
let s = SmolStr::concat_many(&["a".repeat(11).as_str(), "b".repeat(11).as_str()]);
assert!(s.is_inline());
assert_eq!(s.len(), SMOL_STR_INLINE_CAP);
}
#[test]
fn concat_many_heap_path() {
let s = SmolStr::concat_many(&["aaaaaaaa", "bbbbbbbb", "cccccccc", "dddddddd"]);
assert!(!s.is_inline());
assert_eq!(s.as_str(), "aaaaaaaabbbbbbbbccccccccdddddddd");
assert_eq!(s.len(), 32);
}
#[test]
fn try_build_inline_fills_inline_slot() {
let src = b"HELLO";
let s = SmolStr::try_build_inline(src.len(), |out| {
for (i, b) in src.iter().enumerate() {
out[i] = b.to_ascii_lowercase();
}
})
.expect("inline path should accept 5-byte payload");
assert!(s.is_inline());
assert_eq!(s.as_str(), "hello");
}
#[test]
fn try_build_inline_at_cap_inline() {
let s =
SmolStr::try_build_inline(SMOL_STR_INLINE_CAP, |out| out.fill(b'x')).expect("22 fits");
assert!(s.is_inline());
assert_eq!(s.len(), SMOL_STR_INLINE_CAP);
}
#[test]
fn try_build_inline_overflow_returns_none() {
let s = SmolStr::try_build_inline(SMOL_STR_INLINE_CAP + 1, |_out| {
panic!("writer must not run when out_len exceeds cap");
});
assert!(s.is_none());
}
#[test]
fn try_build_inline_rejects_invalid_utf8() {
let s = SmolStr::try_build_inline(1, |out| out[0] = 0xff);
assert!(s.is_none());
}
#[test]
fn try_build_inline_zero_length_is_empty() {
let s = SmolStr::try_build_inline(0, |_out| { })
.expect("zero-length always inline");
assert!(s.is_inline());
assert_eq!(s.as_str(), "");
}
#[test]
fn is_ascii_inline_empty() {
let s = SmolStr::new_empty();
assert!(s.is_inline());
assert!(s.is_ascii());
}
#[test]
fn is_ascii_inline_pure_ascii() {
let s = SmolStr::from_borrowed("hello");
assert!(s.is_inline());
assert!(s.is_ascii());
}
#[test]
fn is_ascii_inline_with_high_byte() {
let raw = vec![b'c', b'a', b'f', 0xC3, 0xA9];
let payload = String::from_utf8(raw).expect("valid UTF-8");
let s = SmolStr::from_borrowed(&payload);
assert!(s.is_inline());
assert!(!s.is_ascii());
}
#[test]
fn is_ascii_inline_at_cap_boundary() {
let payload = "a".repeat(SMOL_STR_INLINE_CAP);
let s = SmolStr::from_borrowed(&payload);
assert!(s.is_inline());
assert!(s.is_ascii());
}
#[test]
fn is_ascii_heap_pure_ascii() {
let payload = "x".repeat(SMOL_STR_INLINE_CAP + 8);
let s = SmolStr::from_borrowed(&payload);
assert!(!s.is_inline());
assert!(s.is_ascii());
}
#[test]
fn is_ascii_heap_with_non_ascii() {
let mut payload = "x".repeat(SMOL_STR_INLINE_CAP).into_bytes();
payload.extend_from_slice(&[b'y', b'y', b'z', 0xC3, 0xA9]);
let payload = String::from_utf8(payload).expect("valid UTF-8");
let s = SmolStr::from_borrowed(&payload);
assert!(!s.is_inline());
assert!(!s.is_ascii());
}
#[test]
fn concat_many_matches_nested_concat() {
let leaves = ["foo_", "bar_", "baz_", "qux_"];
let nested = {
let mut acc = SmolStr::new_empty();
for leaf in leaves.iter() {
acc = SmolStr::concat(acc.as_str(), leaf);
}
acc
};
let folded = SmolStr::concat_many(&leaves);
assert_eq!(nested.as_str(), folded.as_str());
}
}