1#![allow(rustdoc::bare_urls)]
2#![doc = include_str!("../README.md")]
3#![allow(unstable_name_collisions)]
4#![no_std]
5
6extern crate alloc;
7
8#[rustversion::before(1.84)]
9use sptr::Strict;
10
11use alloc::{
12 alloc::{alloc, dealloc, Layout},
13 borrow::{Cow, ToOwned},
14 boxed::Box,
15 str::Utf8Error,
16 string::String,
17};
18use core::{
19 cmp::Ordering,
20 fmt,
21 hash::{Hash, Hasher},
22 iter::FromIterator,
23 mem,
24 ops::Deref,
25 ptr, slice, str,
26};
27
28mod vint;
29use crate::vint::VarInt;
30
31#[cfg(feature = "rkyv")]
32mod rkyv;
33
34const HEAP_ALIGN: usize = 4;
35const WIDTH: usize = mem::size_of::<usize>();
36
37#[repr(packed)]
54pub struct ColdString {
55 encoded: *const u8,
61}
62
63impl ColdString {
64 const TAG_MASK: usize = usize::from_ne_bytes(0b11000000usize.to_le_bytes());
65 const INLINE_TAG: usize = usize::from_ne_bytes(0b11111000usize.to_le_bytes());
66 const PTR_TAG: usize = usize::from_ne_bytes(0b10000000usize.to_le_bytes());
67 const LEN_MASK: usize = usize::from_ne_bytes(0b111usize.to_le_bytes());
68 const ROT: u32 = if cfg!(target_endian = "little") {
69 0
70 } else {
71 8 * (WIDTH - 1) as u32
72 };
73
74 pub fn from_utf8<B: AsRef<[u8]>>(v: B) -> Result<Self, Utf8Error> {
99 Ok(Self::new(str::from_utf8(v.as_ref())?))
100 }
101
102 pub unsafe fn from_utf8_unchecked<B: AsRef<[u8]>>(v: B) -> Self {
123 Self::new(str::from_utf8_unchecked(v.as_ref()))
124 }
125
126 pub fn new<T: AsRef<str>>(x: T) -> Self {
130 let s = x.as_ref();
131 if s.len() <= WIDTH {
132 Self::new_inline(s)
133 } else {
134 Self::new_heap(s)
135 }
136 }
137
138 #[inline]
139 const fn inline_buf(s: &str) -> [u8; WIDTH] {
140 debug_assert!(s.len() <= WIDTH);
141 let mut buf = [0u8; WIDTH];
142 if s.len() < WIDTH {
143 let tag =
144 (Self::INLINE_TAG | s.len().rotate_left(Self::ROT)).rotate_right(Self::ROT) as u8;
145 buf[0] = tag;
146 }
147 buf
148 }
149
150 #[rustversion::attr(since(1.61), const)]
151 #[inline]
152 fn from_inline_buf(b: [u8; WIDTH]) -> Self {
153 let encoded = ptr::null_mut::<u8>().wrapping_add(usize::from_ne_bytes(b));
154 Self { encoded }
155 }
156
157 #[inline]
158 const fn utf8_start(l: usize) -> usize {
159 (l < WIDTH) as usize
160 }
161
162 #[inline]
163 fn new_inline(s: &str) -> Self {
164 let mut buf = Self::inline_buf(s);
165 let start = Self::utf8_start(s.len());
166 buf[start..s.len() + start].copy_from_slice(s.as_bytes());
167 Self::from_inline_buf(buf)
168 }
169
170 #[rustversion::since(1.61)]
186 #[inline]
187 pub const fn new_inline_const(s: &str) -> Self {
188 if s.len() > WIDTH {
189 panic!(
190 "Length for `new_inline_const` must be less than `core::mem::size_of::<usize>()`."
191 );
192 }
193 let mut buf = Self::inline_buf(s);
194 let start = Self::utf8_start(s.len());
195 let mut i = 0;
196 while i < s.len() {
197 buf[i + start] = s.as_bytes()[i];
198 i += 1;
199 }
200 Self::from_inline_buf(buf)
201 }
202
203 #[rustversion::attr(since(1.71), const)]
204 #[inline]
205 unsafe fn ptr(&self) -> *const u8 {
206 ptr::read_unaligned(ptr::addr_of!(self.encoded))
207 }
208
209 #[inline]
210 fn addr(&self) -> usize {
211 unsafe { self.ptr().addr() }
212 }
213
214 #[inline]
215 fn tag(&self) -> usize {
216 self.addr() & Self::TAG_MASK
217 }
218
219 #[inline]
221 pub fn is_inline(&self) -> bool {
222 self.tag() != Self::PTR_TAG
223 }
224
225 #[inline]
226 fn new_heap(s: &str) -> Self {
227 let len = s.len();
228 let (vint_len, len_buf) = VarInt::write(len as u64);
229 let total = vint_len + len;
230 let layout = Layout::from_size_align(total, HEAP_ALIGN).unwrap();
231
232 unsafe {
233 let ptr = alloc(layout);
234 if ptr.is_null() {
235 alloc::alloc::handle_alloc_error(layout);
236 }
237
238 ptr::copy_nonoverlapping(len_buf.as_ptr(), ptr, vint_len);
240 ptr::copy_nonoverlapping(s.as_ptr(), ptr.add(vint_len), len);
241 let encoded = ptr.map_addr(|addr| {
242 debug_assert!(addr % HEAP_ALIGN == 0);
243 let mut addr = addr.rotate_left(6 + Self::ROT);
244 addr |= Self::PTR_TAG;
245 addr
246 });
247 Self { encoded }
248 }
249 }
250
251 #[inline]
252 fn heap_ptr(&self) -> *const u8 {
253 debug_assert!(!self.is_inline());
254 unsafe {
255 self.ptr().map_addr(|mut addr| {
256 addr ^= Self::PTR_TAG;
257 let addr = addr.rotate_right(6 + Self::ROT);
258 debug_assert!(addr % HEAP_ALIGN == 0);
259 addr
260 })
261 }
262 }
263
264 #[inline]
265 fn inline_len(&self) -> usize {
266 let addr = self.addr();
267 match addr & Self::INLINE_TAG {
268 Self::INLINE_TAG => (addr & Self::LEN_MASK).rotate_right(Self::ROT),
269 _ => WIDTH,
270 }
271 }
272
273 #[inline]
290 pub fn len(&self) -> usize {
291 if self.is_inline() {
292 self.inline_len()
293 } else {
294 unsafe {
295 let ptr = self.heap_ptr();
296 let (len, _) = VarInt::read(ptr);
297 len as usize
298 }
299 }
300 }
301
302 #[allow(unsafe_op_in_unsafe_fn)]
303 #[inline]
304 unsafe fn decode_inline(&self) -> &[u8] {
305 let len = self.inline_len();
306 let self_bytes_ptr = ptr::addr_of!(self.encoded) as *const u8;
308 let start = Self::utf8_start(len);
309 slice::from_raw_parts(self_bytes_ptr.add(start), len)
310 }
311
312 #[allow(unsafe_op_in_unsafe_fn)]
313 #[inline]
314 unsafe fn decode_heap(&self) -> &[u8] {
315 let ptr = self.heap_ptr();
316 let (len, header) = VarInt::read(ptr);
317 let data = ptr.add(header);
318 slice::from_raw_parts(data, len)
319 }
320
321 #[inline]
335 pub fn as_bytes(&self) -> &[u8] {
336 match self.is_inline() {
337 true => unsafe { self.decode_inline() },
338 false => unsafe { self.decode_heap() },
339 }
340 }
341
342 #[inline]
351 pub fn as_str(&self) -> &str {
352 unsafe { str::from_utf8_unchecked(self.as_bytes()) }
353 }
354
355 #[inline]
364 pub fn is_empty(&self) -> bool {
365 self.len() == 0
366 }
367}
368
369impl Default for ColdString {
370 fn default() -> Self {
371 Self::new_inline("")
372 }
373}
374
375impl Deref for ColdString {
376 type Target = str;
377 fn deref(&self) -> &str {
378 self.as_str()
379 }
380}
381
382impl Drop for ColdString {
383 fn drop(&mut self) {
384 if !self.is_inline() {
385 unsafe {
386 let ptr = self.heap_ptr();
387 let (len, header) = VarInt::read(ptr);
388 let total = header + len;
389 let layout = Layout::from_size_align(total, HEAP_ALIGN).unwrap();
390 dealloc(ptr as *mut u8, layout);
391 }
392 }
393 }
394}
395
396impl Clone for ColdString {
397 fn clone(&self) -> Self {
398 match self.is_inline() {
399 true => unsafe {
400 Self {
401 encoded: self.ptr(),
402 }
403 },
404 false => Self::new_heap(self.as_str()),
405 }
406 }
407}
408
409impl PartialEq for ColdString {
410 fn eq(&self, other: &Self) -> bool {
411 match (self.is_inline(), other.is_inline()) {
412 (true, true) => unsafe { self.ptr() == other.ptr() },
413 (false, false) => unsafe { self.decode_heap() == other.decode_heap() },
414 _ => false,
415 }
416 }
417}
418
419impl Eq for ColdString {}
420
421impl Hash for ColdString {
422 fn hash<H: Hasher>(&self, state: &mut H) {
423 self.as_str().hash(state)
424 }
425}
426
427impl fmt::Debug for ColdString {
428 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
429 fmt::Debug::fmt(self.as_str(), f)
430 }
431}
432
433impl fmt::Display for ColdString {
434 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
435 fmt::Display::fmt(self.as_str(), f)
436 }
437}
438
439impl From<&str> for ColdString {
440 fn from(s: &str) -> Self {
441 Self::new(s)
442 }
443}
444
445impl From<String> for ColdString {
446 fn from(s: String) -> Self {
447 Self::new(&s)
448 }
449}
450
451impl From<ColdString> for String {
452 fn from(s: ColdString) -> Self {
453 s.as_str().to_owned()
454 }
455}
456
457impl From<ColdString> for Cow<'_, str> {
458 #[inline]
459 fn from(s: ColdString) -> Self {
460 Self::Owned(s.into())
461 }
462}
463
464impl<'a> From<&'a ColdString> for Cow<'a, str> {
465 #[inline]
466 fn from(s: &'a ColdString) -> Self {
467 Self::Borrowed(s)
468 }
469}
470
471impl<'a> From<Cow<'a, str>> for ColdString {
472 fn from(cow: Cow<'a, str>) -> Self {
473 match cow {
474 Cow::Borrowed(s) => s.into(),
475 Cow::Owned(s) => s.into(),
476 }
477 }
478}
479
480impl From<Box<str>> for ColdString {
481 #[inline]
482 #[track_caller]
483 fn from(b: Box<str>) -> Self {
484 Self::new(&b)
485 }
486}
487
488impl FromIterator<char> for ColdString {
489 fn from_iter<I: IntoIterator<Item = char>>(iter: I) -> Self {
490 let s: String = iter.into_iter().collect();
491 ColdString::new(&s)
492 }
493}
494
495unsafe impl Send for ColdString {}
496unsafe impl Sync for ColdString {}
497
498impl core::borrow::Borrow<str> for ColdString {
499 fn borrow(&self) -> &str {
500 self.as_str()
501 }
502}
503
504impl PartialEq<str> for ColdString {
505 fn eq(&self, other: &str) -> bool {
506 if self.is_inline() {
507 unsafe { self.decode_inline() == other.as_bytes() }
508 } else {
509 unsafe { self.decode_heap() == other.as_bytes() }
510 }
511 }
512}
513
514impl PartialEq<ColdString> for str {
515 fn eq(&self, other: &ColdString) -> bool {
516 other.eq(self)
517 }
518}
519
520impl PartialEq<&str> for ColdString {
521 fn eq(&self, other: &&str) -> bool {
522 self.eq(*other)
523 }
524}
525
526impl PartialEq<ColdString> for &str {
527 fn eq(&self, other: &ColdString) -> bool {
528 other.eq(*self)
529 }
530}
531
532impl AsRef<str> for ColdString {
533 #[inline]
534 fn as_ref(&self) -> &str {
535 self.as_str()
536 }
537}
538
539impl AsRef<[u8]> for ColdString {
540 #[inline]
541 fn as_ref(&self) -> &[u8] {
542 self.as_bytes()
543 }
544}
545
546impl Ord for ColdString {
547 fn cmp(&self, other: &Self) -> Ordering {
548 self.as_str().cmp(other.as_str())
549 }
550}
551
552impl PartialOrd for ColdString {
553 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
554 self.as_str().partial_cmp(other.as_str())
555 }
556}
557
558impl alloc::str::FromStr for ColdString {
559 type Err = core::convert::Infallible;
560 fn from_str(s: &str) -> Result<ColdString, Self::Err> {
561 Ok(ColdString::new(s))
562 }
563}
564
565#[cfg(feature = "serde")]
566impl serde::Serialize for ColdString {
567 fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
568 serializer.serialize_str(self.as_str())
569 }
570}
571
572#[cfg(feature = "serde")]
573impl<'de> serde::Deserialize<'de> for ColdString {
574 fn deserialize<D: serde::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
575 let s = String::deserialize(d)?;
576 Ok(ColdString::new(&s))
577 }
578}
579
580#[cfg(all(test, feature = "serde"))]
581mod serde_tests {
582 use super::*;
583 use serde_test::{assert_tokens, Token};
584
585 #[test]
586 fn test_serde_cold_string_inline() {
587 let cs = ColdString::new("ferris");
588 assert_tokens(&cs, &[Token::Str("ferris")]);
589 }
590
591 #[test]
592 fn test_serde_cold_string_heap() {
593 let long_str = "This is a significantly longer string for heap testing";
594 let cs = ColdString::new(long_str);
595 assert_tokens(&cs, &[Token::Str(long_str)]);
596 }
597}
598
599#[cfg(test)]
600mod tests {
601 use super::*;
602 use core::hash::BuildHasher;
603 use hashbrown::hash_map::DefaultHashBuilder;
604
605 #[test]
606 fn test_layout() {
607 assert_eq!(mem::size_of::<ColdString>(), mem::size_of::<usize>());
608 assert_eq!(mem::align_of::<ColdString>(), 1);
609 struct Foo {
610 _s: ColdString,
611 _b: u8,
612 }
613
614 assert_eq!(mem::size_of::<Foo>(), mem::size_of::<usize>() + 1);
615 assert_eq!(mem::align_of::<Foo>(), 1);
616 }
617
618 #[test]
619 fn test_default() {
620 assert!(ColdString::default().is_empty());
621 assert_eq!(ColdString::default().len(), 0);
622 assert_eq!(ColdString::default(), "");
623 assert_eq!(ColdString::default(), ColdString::new(""));
624 }
625
626 fn assert_correct(s: &str) {
627 let cs = ColdString::new(s);
628 assert_eq!(s.len() <= mem::size_of::<usize>(), cs.is_inline());
629 assert_eq!(cs.len(), s.len());
630 assert_eq!(cs.as_bytes(), s.as_bytes());
631 assert_eq!(cs.as_str(), s);
632 assert_eq!(cs.clone(), cs);
633 let bh = DefaultHashBuilder::new();
634 let mut hasher1 = bh.build_hasher();
635 cs.hash(&mut hasher1);
636 let mut hasher2 = bh.build_hasher();
637 cs.clone().hash(&mut hasher2);
638 assert_eq!(hasher1.finish(), hasher2.finish());
639 assert_eq!(cs, s);
640 assert_eq!(s, cs);
641 assert_eq!(cs, *s);
642 assert_eq!(*s, cs);
643 }
644
645 #[test]
646 fn it_works() {
647 for s in [
648 "1",
649 "12",
650 "123",
651 "1234",
652 "12345",
653 "123456",
654 "1234567",
655 "12345678",
656 "123456789",
657 str::from_utf8(&[240, 159, 146, 150]).unwrap(),
658 "✅",
659 "❤️",
660 "🦀💯",
661 "🦀",
662 "💯",
663 "abcd",
664 "test",
665 "",
666 "\0",
667 "\0\0",
668 "\0\0\0",
669 "\0\0\0\0",
670 "\0\0\0\0\0\0\0",
671 "\0\0\0\0\0\0\0\0",
672 "1234567",
673 "12345678",
674 "longer test",
675 str::from_utf8(&[103, 39, 240, 145, 167, 156, 194, 165]).unwrap(),
676 "AaAa0 ® ",
677 str::from_utf8(&[240, 158, 186, 128, 240, 145, 143, 151]).unwrap(),
678 ] {
679 assert_correct(s);
680 }
681 }
682
683 fn char_from_leading_byte(b: u8) -> Option<char> {
684 match b {
685 0x00..=0x7F => Some(b as char),
686 0xC2..=0xDF => str::from_utf8(&[b, 0x91]).unwrap().chars().next(),
687 0xE0 => str::from_utf8(&[b, 0xA0, 0x91]).unwrap().chars().next(),
688 0xE1..=0xEC | 0xEE..=0xEF => str::from_utf8(&[b, 0x91, 0xA5]).unwrap().chars().next(),
689 0xED => str::from_utf8(&[b, 0x80, 0x91]).unwrap().chars().next(),
690 0xF0 => str::from_utf8(&[b, 0x90, 0x91, 0xA5])
691 .unwrap()
692 .chars()
693 .next(),
694 0xF1..=0xF3 => str::from_utf8(&[b, 0x91, 0xA5, 0x82])
695 .unwrap()
696 .chars()
697 .next(),
698 0xF4 => str::from_utf8(&[b, 0x80, 0x91, 0x82])
699 .unwrap()
700 .chars()
701 .next(),
702 _ => None,
703 }
704 }
705
706 #[test]
707 fn test_edges() {
708 let width = mem::size_of::<usize>();
709 for len in [width - 1, width, width + 1] {
710 for first_byte in 0u8..=255 {
711 let first_char = match char_from_leading_byte(first_byte) {
712 Some(c) => c,
713 None => continue,
714 };
715
716 let mut s = String::with_capacity(len);
717 s.push(first_char);
718
719 while s.len() < len {
720 let c = core::char::from_digit((len - s.len()) as u32, 10).unwrap();
721 s.push(c);
722 }
723
724 assert_correct(&s);
725 }
726 }
727 }
728
729 #[test]
730 fn test_unaligned_placement() {
731 for s_content in ["torture", "tor", "tortures", "tort", "torture torture"] {
732 let mut buffer = [0u8; 32];
733 for offset in 0..8 {
734 unsafe {
735 let dst = buffer.as_mut_ptr().add(offset) as *mut ColdString;
736 let s = ColdString::new(s_content);
737 ptr::write_unaligned(dst, s);
738 let recovered = ptr::read_unaligned(dst);
739 assert_eq!(recovered.as_str(), s_content);
740 }
741 }
742 }
743 }
744}