Skip to main content

cold_string/
lib.rs

1#![allow(rustdoc::bare_urls)]
2#![doc = include_str!("../README.md")]
3#![cfg_attr(not(feature = "std"), no_std)]
4
5extern crate alloc;
6
7use alloc::{
8    alloc::{alloc, dealloc, Layout},
9    str::Utf8Error,
10    string::String,
11};
12use core::{
13    fmt,
14    hash::{Hash, Hasher},
15    mem,
16    ops::Deref,
17    ptr::{self, with_exposed_provenance_mut},
18    slice, str,
19};
20
21mod vint;
22use crate::vint::VarInt;
23
24const HEAP_ALIGN: usize = 2;
25const WIDTH: usize = mem::size_of::<usize>();
26
27/// Compact representation of immutable UTF-8 strings. Optimized for memory usage and struct packing.
28///
29/// # Example
30/// ```
31/// let s = cold_string::ColdString::new("qwerty");
32/// assert_eq!(s.as_str(), "qwerty");
33/// ```
34/// ```
35/// use std::mem;
36/// use cold_string::ColdString;
37///
38/// assert_eq!(mem::size_of::<ColdString>(), 8);
39/// assert_eq!(mem::align_of::<ColdString>(), 1);
40/// assert_eq!(mem::size_of::<(ColdString, u8)>(), 9);
41/// assert_eq!(mem::align_of::<(ColdString, u8)>(), 1);
42/// ```
43#[repr(transparent)]
44pub struct ColdString([u8; WIDTH]);
45
46impl ColdString {
47    /// Convert a slice of bytes into a [`ColdString`].
48    ///
49    /// A [`ColdString`] is a contiguous collection of bytes (`u8`s) that is valid [`UTF-8`](https://en.wikipedia.org/wiki/UTF-8).
50    /// This method converts from an arbitrary contiguous collection of bytes into a
51    /// [`ColdString`], failing if the provided bytes are not `UTF-8`.
52    ///
53    /// # Examples
54    /// ### Valid UTF-8
55    /// ```
56    /// # use cold_string::ColdString;
57    /// let bytes = [240, 159, 166, 128, 240, 159, 146, 175];
58    /// let compact = ColdString::from_utf8(&bytes).expect("valid UTF-8");
59    ///
60    /// assert_eq!(compact, "🦀💯");
61    /// ```
62    ///
63    /// ### Invalid UTF-8
64    /// ```
65    /// # use cold_string::ColdString;
66    /// let bytes = [255, 255, 255];
67    /// let result = ColdString::from_utf8(&bytes);
68    ///
69    /// assert!(result.is_err());
70    /// ```
71    pub fn from_utf8(v: &[u8]) -> Result<Self, Utf8Error> {
72        Ok(Self::new(str::from_utf8(v)?))
73    }
74
75    /// Converts a vector of bytes to a [`ColdString`] without checking that the string contains
76    /// valid UTF-8.
77    ///
78    /// See the safe version, [`ColdString::from_utf8`], for more details.
79    ///
80    /// # Examples
81    ///
82    /// Basic usage:
83    ///
84    /// ```
85    /// # use cold_string::ColdString;
86    /// // some bytes, in a vector
87    /// let sparkle_heart = [240, 159, 146, 150];
88    ///
89    /// let sparkle_heart = unsafe {
90    ///     ColdString::from_utf8_unchecked(&sparkle_heart)
91    /// };
92    ///
93    /// assert_eq!("💖", sparkle_heart);
94    /// ```
95    pub unsafe fn from_utf8_unchecked(v: &[u8]) -> Self {
96        Self::new(str::from_utf8_unchecked(v))
97    }
98
99    /// Creates a new [`ColdString`] from any type that implements `AsRef<str>`.
100    /// If the string is short enough, then it will be inlined on the stack.
101    pub fn new<T: AsRef<str>>(x: T) -> Self {
102        let s = x.as_ref();
103        if s.len() < WIDTH {
104            Self::new_inline(s)
105        } else {
106            Self::new_heap(s)
107        }
108    }
109
110    #[inline]
111    const fn is_inline(&self) -> bool {
112        self.0[0] & 1 == 1
113    }
114
115    #[inline]
116    const fn new_inline(s: &str) -> Self {
117        debug_assert!(s.len() < WIDTH);
118        let mut buf = [0u8; WIDTH];
119        unsafe {
120            let dest_ptr = buf.as_mut_ptr().add(1);
121            ptr::copy_nonoverlapping(s.as_ptr(), dest_ptr, s.len());
122        }
123        buf[0] = ((s.len() as u8) << 1) | 1;
124        Self(buf)
125    }
126
127    #[inline]
128    fn new_heap(s: &str) -> Self {
129        let len = s.len();
130        let mut len_buf = [0u8; 10];
131        let vint_len = VarInt::write(len as u64, &mut len_buf);
132        let total = vint_len + len;
133        let layout = Layout::from_size_align(total, HEAP_ALIGN).unwrap();
134
135        unsafe {
136            let ptr = alloc(layout);
137            if ptr.is_null() {
138                alloc::alloc::handle_alloc_error(layout);
139            }
140
141            // TODO: can optimize this
142            ptr::copy_nonoverlapping(len_buf.as_ptr(), ptr, vint_len);
143            ptr::copy_nonoverlapping(s.as_ptr(), ptr.add(vint_len), len);
144
145            let addr = ptr.expose_provenance();
146            debug_assert!(addr % 2 == 0);
147            Self(addr.to_le_bytes())
148        }
149    }
150
151    #[inline]
152    fn heap_ptr(&self) -> *mut u8 {
153        // Can be const in 1.91
154        debug_assert!(!self.is_inline());
155        let addr = usize::from_le_bytes(self.0);
156        debug_assert!(addr % 2 == 0);
157        with_exposed_provenance_mut::<u8>(addr)
158    }
159
160    #[inline]
161    const fn inline_len(&self) -> usize {
162        self.0[0] as usize >> 1
163    }
164
165    /// Returns the length of this `ColdString`, in bytes, not [`char`]s or
166    /// graphemes. In other words, it might not be what a human considers the
167    /// length of the string.
168    ///
169    /// # Examples
170    ///
171    /// ```
172    /// use cold_string::ColdString;
173    ///
174    /// let a = ColdString::from("foo");
175    /// assert_eq!(a.len(), 3);
176    ///
177    /// let fancy_f = String::from("Æ’oo");
178    /// assert_eq!(fancy_f.len(), 4);
179    /// assert_eq!(fancy_f.chars().count(), 3);
180    /// ```
181    #[inline]
182    pub fn len(&self) -> usize {
183        if self.is_inline() {
184            self.inline_len()
185        } else {
186            unsafe {
187                let ptr = self.heap_ptr();
188                let (len, _) = VarInt::read(ptr);
189                len as usize
190            }
191        }
192    }
193
194    #[allow(unsafe_op_in_unsafe_fn)]
195    #[inline]
196    unsafe fn decode_inline(&self) -> &[u8] {
197        let len = self.inline_len();
198        let ptr = self.0.as_ptr().add(1);
199        slice::from_raw_parts(ptr, len)
200    }
201
202    #[allow(unsafe_op_in_unsafe_fn)]
203    #[inline]
204    unsafe fn decode_heap(&self) -> &[u8] {
205        let ptr = self.heap_ptr();
206        let (len, header) = VarInt::read(ptr);
207        let data = ptr.add(header);
208        slice::from_raw_parts(data, len as usize)
209    }
210
211    /// Returns a byte slice of this `ColdString`'s contents.
212    ///
213    /// The inverse of this method is [`from_utf8`].
214    ///
215    /// [`from_utf8`]: String::from_utf8
216    ///
217    /// # Examples
218    ///
219    /// ```
220    /// let s = cold_string::ColdString::from("hello");
221    ///
222    /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes());
223    /// ```
224    #[inline]
225    pub fn as_bytes(&self) -> &[u8] {
226        match self.is_inline() {
227            true => unsafe { self.decode_inline() },
228            false => unsafe { self.decode_heap() },
229        }
230    }
231
232    /// Returns a string slice containing the entire [`ColdString`].
233    ///
234    /// # Examples
235    /// ```
236    /// let s = cold_string::ColdString::new("hello");
237    ///
238    /// assert_eq!(s.as_str(), "hello");
239    /// ```
240    #[inline]
241    pub fn as_str(&self) -> &str {
242        unsafe { str::from_utf8_unchecked(self.as_bytes()) }
243    }
244}
245
246impl Deref for ColdString {
247    type Target = str;
248    fn deref(&self) -> &str {
249        self.as_str()
250    }
251}
252
253impl Drop for ColdString {
254    fn drop(&mut self) {
255        if !self.is_inline() {
256            unsafe {
257                let ptr = self.heap_ptr();
258                let (len, header) = VarInt::read(ptr);
259                let total = header + len as usize;
260                let layout = Layout::from_size_align(total, HEAP_ALIGN).unwrap();
261                dealloc(ptr, layout);
262            }
263        }
264    }
265}
266
267impl Clone for ColdString {
268    fn clone(&self) -> Self {
269        match self.is_inline() {
270            true => Self(self.0),
271            false => Self::new_heap(self.as_str()),
272        }
273    }
274}
275
276impl PartialEq for ColdString {
277    fn eq(&self, other: &Self) -> bool {
278        match (self.is_inline(), other.is_inline()) {
279            (true, true) => self.0 == other.0,
280            (false, false) => unsafe { self.decode_heap() == other.decode_heap() },
281            _ => false,
282        }
283    }
284}
285
286impl Eq for ColdString {}
287
288impl Hash for ColdString {
289    fn hash<H: Hasher>(&self, state: &mut H) {
290        self.as_str().hash(state)
291    }
292}
293
294impl fmt::Debug for ColdString {
295    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
296        self.as_str().fmt(f)
297    }
298}
299
300impl fmt::Display for ColdString {
301    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
302        self.as_str().fmt(f)
303    }
304}
305
306impl From<&str> for ColdString {
307    fn from(s: &str) -> Self {
308        Self::new(s)
309    }
310}
311
312impl From<String> for ColdString {
313    fn from(s: String) -> Self {
314        Self::new(&s)
315    }
316}
317
318impl FromIterator<char> for ColdString {
319    fn from_iter<I: IntoIterator<Item = char>>(iter: I) -> Self {
320        let s: String = iter.into_iter().collect();
321        ColdString::new(&s)
322    }
323}
324
325unsafe impl Send for ColdString {}
326unsafe impl Sync for ColdString {}
327
328impl core::borrow::Borrow<str> for ColdString {
329    fn borrow(&self) -> &str {
330        self.as_str()
331    }
332}
333
334impl PartialEq<str> for ColdString {
335    fn eq(&self, other: &str) -> bool {
336        if self.is_inline() {
337            unsafe { self.decode_inline() == other.as_bytes() }
338        } else {
339            unsafe { self.decode_heap() == other.as_bytes() }
340        }
341    }
342}
343
344impl PartialEq<ColdString> for str {
345    fn eq(&self, other: &ColdString) -> bool {
346        other.eq(self)
347    }
348}
349
350impl PartialEq<&str> for ColdString {
351    fn eq(&self, other: &&str) -> bool {
352        self.eq(*other)
353    }
354}
355
356impl PartialEq<ColdString> for &str {
357    fn eq(&self, other: &ColdString) -> bool {
358        other.eq(*self)
359    }
360}
361
362impl AsRef<str> for ColdString {
363    #[inline]
364    fn as_ref(&self) -> &str {
365        self.as_str()
366    }
367}
368
369impl AsRef<[u8]> for ColdString {
370    #[inline]
371    fn as_ref(&self) -> &[u8] {
372        self.as_bytes()
373    }
374}
375
376impl alloc::str::FromStr for ColdString {
377    type Err = core::convert::Infallible;
378    fn from_str(s: &str) -> Result<ColdString, Self::Err> {
379        Ok(ColdString::new(s))
380    }
381}
382
383#[cfg(feature = "serde")]
384impl serde::Serialize for ColdString {
385    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
386        serializer.serialize_str(self.as_str())
387    }
388}
389
390#[cfg(feature = "serde")]
391impl<'de> serde::Deserialize<'de> for ColdString {
392    fn deserialize<D: serde::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
393        let s = String::deserialize(d)?;
394        Ok(ColdString::new(&s))
395    }
396}
397
398#[cfg(all(test, feature = "serde"))]
399mod serde_tests {
400    use super::*;
401    use serde_test::{assert_tokens, Token};
402
403    #[test]
404    fn test_serde_cold_string_inline() {
405        let cs = ColdString::new("ferris");
406        assert_tokens(&cs, &[Token::Str("ferris")]);
407    }
408
409    #[test]
410    fn test_serde_cold_string_heap() {
411        let long_str = "This is a significantly longer string for heap testing";
412        let cs = ColdString::new(long_str);
413        assert_tokens(&cs, &[Token::Str(long_str)]);
414    }
415}
416
417#[cfg(test)]
418mod tests {
419    use super::*;
420
421    #[test]
422    fn test_layout() {
423        assert_eq!(mem::size_of::<ColdString>(), 8);
424        assert_eq!(mem::align_of::<ColdString>(), 1);
425        struct Foo {
426            _s: ColdString,
427            _b: u8,
428        }
429
430        assert_eq!(mem::size_of::<Foo>(), 9);
431        assert_eq!(mem::align_of::<Foo>(), 1);
432    }
433
434    #[test]
435    fn it_works() {
436        for s in ["test", "", "1234567", "longer test"] {
437            let cs = ColdString::new(s);
438            assert_eq!(cs.as_str(), s);
439            assert_eq!(cs.len(), s.len());
440            assert_eq!(cs.len() < 8, cs.is_inline());
441            assert_eq!(cs.clone(), cs);
442            #[cfg(feature = "std")]
443            {
444                use std::hash::{BuildHasher, RandomState};
445                let bh = RandomState::new();
446                assert_eq!(bh.hash_one(&cs), bh.hash_one(&cs.clone()));
447            }
448            assert_eq!(cs, s);
449            assert_eq!(s, cs);
450            assert_eq!(cs, *s);
451            assert_eq!(*s, cs);
452        }
453    }
454}