smol_str/
lib.rs

1#![cfg_attr(not(feature = "std"), no_std)]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3#![debugger_visualizer(gdb_script_file = "gdb_smolstr_printer.py")]
4
5extern crate alloc;
6
7use alloc::{borrow::Cow, boxed::Box, string::String, sync::Arc};
8use core::{
9    borrow::Borrow,
10    cmp::{self, Ordering},
11    convert::Infallible,
12    fmt, hash, iter, mem, ops,
13    str::FromStr,
14};
15
16/// A `SmolStr` is a string type that has the following properties:
17///
18/// * `size_of::<SmolStr>() == 24` (therefor `== size_of::<String>()` on 64 bit platforms)
19/// * `Clone` is `O(1)`
20/// * Strings are stack-allocated if they are:
21///     * Up to 23 bytes long
22///     * Longer than 23 bytes, but substrings of `WS` (see below). Such strings consist
23///       solely of consecutive newlines, followed by consecutive spaces
24/// * If a string does not satisfy the aforementioned conditions, it is heap-allocated
25/// * Additionally, a `SmolStr` can be explicitly created from a `&'static str` without allocation
26///
27/// Unlike `String`, however, `SmolStr` is immutable. The primary use case for
28/// `SmolStr` is a good enough default storage for tokens of typical programming
29/// languages. Strings consisting of a series of newlines, followed by a series of
30/// whitespace are a typical pattern in computer programs because of indentation.
31/// Note that a specialized interner might be a better solution for some use cases.
32///
33/// `WS`: A string of 32 newlines followed by 128 spaces.
34pub struct SmolStr(Repr);
35
36impl SmolStr {
37    /// Constructs an inline variant of `SmolStr`.
38    ///
39    /// This never allocates.
40    ///
41    /// # Panics
42    ///
43    /// Panics if `text.len() > 23`.
44    #[inline]
45    pub const fn new_inline(text: &str) -> SmolStr {
46        assert!(text.len() <= INLINE_CAP); // avoids bounds checks in loop
47
48        let text = text.as_bytes();
49        let mut buf = [0; INLINE_CAP];
50        let mut i = 0;
51        while i < text.len() {
52            buf[i] = text[i];
53            i += 1
54        }
55        SmolStr(Repr::Inline {
56            // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`
57            // as we asserted it.
58            len: unsafe { InlineSize::transmute_from_u8(text.len() as u8) },
59            buf,
60        })
61    }
62
63    /// Constructs a `SmolStr` from a statically allocated string.
64    ///
65    /// This never allocates.
66    #[inline(always)]
67    pub const fn new_static(text: &'static str) -> SmolStr {
68        // NOTE: this never uses the inline storage; if a canonical
69        // representation is needed, we could check for `len() < INLINE_CAP`
70        // and call `new_inline`, but this would mean an extra branch.
71        SmolStr(Repr::Static(text))
72    }
73
74    /// Constructs a `SmolStr` from a `str`, heap-allocating if necessary.
75    #[inline(always)]
76    pub fn new(text: impl AsRef<str>) -> SmolStr {
77        SmolStr(Repr::new(text.as_ref()))
78    }
79
80    /// Returns a `&str` slice of this `SmolStr`.
81    #[inline(always)]
82    pub fn as_str(&self) -> &str {
83        self.0.as_str()
84    }
85
86    /// Returns the length of `self` in bytes.
87    #[inline(always)]
88    pub fn len(&self) -> usize {
89        self.0.len()
90    }
91
92    /// Returns `true` if `self` has a length of zero bytes.
93    #[inline(always)]
94    pub fn is_empty(&self) -> bool {
95        self.0.is_empty()
96    }
97
98    /// Returns `true` if `self` is heap-allocated.
99    #[inline(always)]
100    pub const fn is_heap_allocated(&self) -> bool {
101        matches!(self.0, Repr::Heap(..))
102    }
103}
104
105impl Clone for SmolStr {
106    #[inline]
107    fn clone(&self) -> Self {
108        // hint for faster inline / slower heap clones
109        #[cold]
110        #[inline(never)]
111        fn cold_clone(v: &SmolStr) -> SmolStr {
112            SmolStr(v.0.clone())
113        }
114
115        if self.is_heap_allocated() {
116            return cold_clone(self);
117        }
118
119        // SAFETY: We verified that the payload of `Repr` is a POD
120        unsafe { core::ptr::read(self as *const SmolStr) }
121    }
122}
123
124impl Default for SmolStr {
125    #[inline(always)]
126    fn default() -> SmolStr {
127        SmolStr(Repr::Inline { len: InlineSize::_V0, buf: [0; INLINE_CAP] })
128    }
129}
130
131impl ops::Deref for SmolStr {
132    type Target = str;
133
134    #[inline(always)]
135    fn deref(&self) -> &str {
136        self.as_str()
137    }
138}
139
140// region: PartialEq implementations
141
142impl Eq for SmolStr {}
143impl PartialEq<SmolStr> for SmolStr {
144    fn eq(&self, other: &SmolStr) -> bool {
145        self.0.ptr_eq(&other.0) || self.as_str() == other.as_str()
146    }
147}
148
149impl PartialEq<str> for SmolStr {
150    #[inline(always)]
151    fn eq(&self, other: &str) -> bool {
152        self.as_str() == other
153    }
154}
155
156impl PartialEq<SmolStr> for str {
157    #[inline(always)]
158    fn eq(&self, other: &SmolStr) -> bool {
159        other == self
160    }
161}
162
163impl<'a> PartialEq<&'a str> for SmolStr {
164    #[inline(always)]
165    fn eq(&self, other: &&'a str) -> bool {
166        self == *other
167    }
168}
169
170impl PartialEq<SmolStr> for &str {
171    #[inline(always)]
172    fn eq(&self, other: &SmolStr) -> bool {
173        *self == other
174    }
175}
176
177impl PartialEq<String> for SmolStr {
178    #[inline(always)]
179    fn eq(&self, other: &String) -> bool {
180        self.as_str() == other
181    }
182}
183
184impl PartialEq<SmolStr> for String {
185    #[inline(always)]
186    fn eq(&self, other: &SmolStr) -> bool {
187        other == self
188    }
189}
190
191impl<'a> PartialEq<&'a String> for SmolStr {
192    #[inline(always)]
193    fn eq(&self, other: &&'a String) -> bool {
194        self == *other
195    }
196}
197
198impl PartialEq<SmolStr> for &String {
199    #[inline(always)]
200    fn eq(&self, other: &SmolStr) -> bool {
201        *self == other
202    }
203}
204// endregion: PartialEq implementations
205
206impl Ord for SmolStr {
207    fn cmp(&self, other: &SmolStr) -> Ordering {
208        self.as_str().cmp(other.as_str())
209    }
210}
211
212impl PartialOrd for SmolStr {
213    fn partial_cmp(&self, other: &SmolStr) -> Option<Ordering> {
214        Some(self.cmp(other))
215    }
216}
217
218impl hash::Hash for SmolStr {
219    fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
220        self.as_str().hash(hasher);
221    }
222}
223
224impl fmt::Debug for SmolStr {
225    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
226        fmt::Debug::fmt(self.as_str(), f)
227    }
228}
229
230impl fmt::Display for SmolStr {
231    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
232        fmt::Display::fmt(self.as_str(), f)
233    }
234}
235
236impl iter::FromIterator<char> for SmolStr {
237    fn from_iter<I: iter::IntoIterator<Item = char>>(iter: I) -> SmolStr {
238        from_char_iter(iter.into_iter())
239    }
240}
241
242#[inline]
243fn from_char_iter(iter: impl Iterator<Item = char>) -> SmolStr {
244    from_buf_and_chars([0; _], 0, iter)
245}
246
247fn from_buf_and_chars(
248    mut buf: [u8; INLINE_CAP],
249    buf_len: usize,
250    mut iter: impl Iterator<Item = char>,
251) -> SmolStr {
252    let min_size = iter.size_hint().0 + buf_len;
253    if min_size > INLINE_CAP {
254        let heap: String =
255            core::str::from_utf8(&buf[..buf_len]).unwrap().chars().chain(iter).collect();
256        if heap.len() <= INLINE_CAP {
257            // size hint lied
258            return SmolStr::new_inline(&heap);
259        }
260        return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
261    }
262    let mut len = buf_len;
263    while let Some(ch) = iter.next() {
264        let size = ch.len_utf8();
265        if size + len > INLINE_CAP {
266            let (min_remaining, _) = iter.size_hint();
267            let mut heap = String::with_capacity(size + len + min_remaining);
268            heap.push_str(core::str::from_utf8(&buf[..len]).unwrap());
269            heap.push(ch);
270            heap.extend(iter);
271            return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
272        }
273        ch.encode_utf8(&mut buf[len..]);
274        len += size;
275    }
276    SmolStr(Repr::Inline {
277        // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`
278        // as we otherwise return early.
279        len: unsafe { InlineSize::transmute_from_u8(len as u8) },
280        buf,
281    })
282}
283
284fn build_from_str_iter<T>(mut iter: impl Iterator<Item = T>) -> SmolStr
285where
286    T: AsRef<str>,
287    String: iter::Extend<T>,
288{
289    let mut len = 0;
290    let mut buf = [0u8; INLINE_CAP];
291    while let Some(slice) = iter.next() {
292        let slice = slice.as_ref();
293        let size = slice.len();
294        if size + len > INLINE_CAP {
295            let mut heap = String::with_capacity(size + len);
296            heap.push_str(core::str::from_utf8(&buf[..len]).unwrap());
297            heap.push_str(slice);
298            heap.extend(iter);
299            return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
300        }
301        buf[len..][..size].copy_from_slice(slice.as_bytes());
302        len += size;
303    }
304    SmolStr(Repr::Inline {
305        // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`
306        // as we otherwise return early.
307        len: unsafe { InlineSize::transmute_from_u8(len as u8) },
308        buf,
309    })
310}
311
312impl iter::FromIterator<String> for SmolStr {
313    fn from_iter<I: iter::IntoIterator<Item = String>>(iter: I) -> SmolStr {
314        build_from_str_iter(iter.into_iter())
315    }
316}
317
318impl<'a> iter::FromIterator<&'a String> for SmolStr {
319    fn from_iter<I: iter::IntoIterator<Item = &'a String>>(iter: I) -> SmolStr {
320        SmolStr::from_iter(iter.into_iter().map(|x| x.as_str()))
321    }
322}
323
324impl<'a> iter::FromIterator<&'a str> for SmolStr {
325    fn from_iter<I: iter::IntoIterator<Item = &'a str>>(iter: I) -> SmolStr {
326        build_from_str_iter(iter.into_iter())
327    }
328}
329
330impl AsRef<str> for SmolStr {
331    #[inline(always)]
332    fn as_ref(&self) -> &str {
333        self.as_str()
334    }
335}
336
337impl AsRef<[u8]> for SmolStr {
338    #[inline(always)]
339    fn as_ref(&self) -> &[u8] {
340        self.as_str().as_bytes()
341    }
342}
343
344#[cfg(feature = "std")]
345#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
346impl AsRef<std::ffi::OsStr> for SmolStr {
347    #[inline(always)]
348    fn as_ref(&self) -> &std::ffi::OsStr {
349        AsRef::<std::ffi::OsStr>::as_ref(self.as_str())
350    }
351}
352
353#[cfg(feature = "std")]
354#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
355impl AsRef<std::path::Path> for SmolStr {
356    #[inline(always)]
357    fn as_ref(&self) -> &std::path::Path {
358        AsRef::<std::path::Path>::as_ref(self.as_str())
359    }
360}
361
362impl From<&str> for SmolStr {
363    #[inline]
364    fn from(s: &str) -> SmolStr {
365        SmolStr::new(s)
366    }
367}
368
369impl From<&mut str> for SmolStr {
370    #[inline]
371    fn from(s: &mut str) -> SmolStr {
372        SmolStr::new(s)
373    }
374}
375
376impl From<&String> for SmolStr {
377    #[inline]
378    fn from(s: &String) -> SmolStr {
379        SmolStr::new(s)
380    }
381}
382
383impl From<String> for SmolStr {
384    #[inline(always)]
385    fn from(text: String) -> Self {
386        Self::new(text)
387    }
388}
389
390impl From<Box<str>> for SmolStr {
391    #[inline]
392    fn from(s: Box<str>) -> SmolStr {
393        SmolStr::new(s)
394    }
395}
396
397impl From<Arc<str>> for SmolStr {
398    #[inline]
399    fn from(s: Arc<str>) -> SmolStr {
400        let repr = Repr::new_on_stack(s.as_ref()).unwrap_or(Repr::Heap(s));
401        Self(repr)
402    }
403}
404
405impl<'a> From<Cow<'a, str>> for SmolStr {
406    #[inline]
407    fn from(s: Cow<'a, str>) -> SmolStr {
408        SmolStr::new(s)
409    }
410}
411
412impl From<SmolStr> for Arc<str> {
413    #[inline(always)]
414    fn from(text: SmolStr) -> Self {
415        match text.0 {
416            Repr::Heap(data) => data,
417            _ => text.as_str().into(),
418        }
419    }
420}
421
422impl From<SmolStr> for String {
423    #[inline(always)]
424    fn from(text: SmolStr) -> Self {
425        text.as_str().into()
426    }
427}
428
429impl Borrow<str> for SmolStr {
430    #[inline(always)]
431    fn borrow(&self) -> &str {
432        self.as_str()
433    }
434}
435
436impl FromStr for SmolStr {
437    type Err = Infallible;
438
439    #[inline]
440    fn from_str(s: &str) -> Result<SmolStr, Self::Err> {
441        Ok(SmolStr::from(s))
442    }
443}
444
445const INLINE_CAP: usize = InlineSize::_V23 as usize;
446const N_NEWLINES: usize = 32;
447const N_SPACES: usize = 128;
448const WS: &str = "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n                                                                                                                                ";
449const _: () = {
450    assert!(WS.len() == N_NEWLINES + N_SPACES);
451    assert!(WS.as_bytes()[N_NEWLINES - 1] == b'\n');
452    assert!(WS.as_bytes()[N_NEWLINES] == b' ');
453};
454
455/// A [`u8`] with a bunch of niches.
456#[derive(Clone, Copy, Debug, PartialEq)]
457#[repr(u8)]
458enum InlineSize {
459    _V0 = 0,
460    _V1,
461    _V2,
462    _V3,
463    _V4,
464    _V5,
465    _V6,
466    _V7,
467    _V8,
468    _V9,
469    _V10,
470    _V11,
471    _V12,
472    _V13,
473    _V14,
474    _V15,
475    _V16,
476    _V17,
477    _V18,
478    _V19,
479    _V20,
480    _V21,
481    _V22,
482    _V23,
483}
484
485impl InlineSize {
486    /// SAFETY: `value` must be less than or equal to [`INLINE_CAP`]
487    #[inline(always)]
488    const unsafe fn transmute_from_u8(value: u8) -> Self {
489        debug_assert!(value <= InlineSize::_V23 as u8);
490        // SAFETY: The caller is responsible to uphold this invariant
491        unsafe { mem::transmute::<u8, Self>(value) }
492    }
493}
494
495#[derive(Clone, Debug)]
496enum Repr {
497    Inline { len: InlineSize, buf: [u8; INLINE_CAP] },
498    Static(&'static str),
499    Heap(Arc<str>),
500}
501
502impl Repr {
503    /// This function tries to create a new Repr::Inline or Repr::Static
504    /// If it isn't possible, this function returns None
505    fn new_on_stack<T>(text: T) -> Option<Self>
506    where
507        T: AsRef<str>,
508    {
509        let text = text.as_ref();
510
511        let len = text.len();
512        if len <= INLINE_CAP {
513            let mut buf = [0; INLINE_CAP];
514            buf[..len].copy_from_slice(text.as_bytes());
515            return Some(Repr::Inline {
516                // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`
517                len: unsafe { InlineSize::transmute_from_u8(len as u8) },
518                buf,
519            });
520        }
521
522        if len <= N_NEWLINES + N_SPACES {
523            let bytes = text.as_bytes();
524            let possible_newline_count = cmp::min(len, N_NEWLINES);
525            let newlines =
526                bytes[..possible_newline_count].iter().take_while(|&&b| b == b'\n').count();
527            let possible_space_count = len - newlines;
528            if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') {
529                let spaces = possible_space_count;
530                let substring = &WS[N_NEWLINES - newlines..N_NEWLINES + spaces];
531                return Some(Repr::Static(substring));
532            }
533        }
534        None
535    }
536
537    fn new(text: &str) -> Self {
538        Self::new_on_stack(text).unwrap_or_else(|| Repr::Heap(Arc::from(text)))
539    }
540
541    #[inline(always)]
542    fn len(&self) -> usize {
543        match self {
544            Repr::Heap(data) => data.len(),
545            Repr::Static(data) => data.len(),
546            Repr::Inline { len, .. } => *len as usize,
547        }
548    }
549
550    #[inline(always)]
551    fn is_empty(&self) -> bool {
552        match self {
553            Repr::Heap(data) => data.is_empty(),
554            Repr::Static(data) => data.is_empty(),
555            &Repr::Inline { len, .. } => len as u8 == 0,
556        }
557    }
558
559    #[inline]
560    fn as_str(&self) -> &str {
561        match self {
562            Repr::Heap(data) => data,
563            Repr::Static(data) => data,
564            Repr::Inline { len, buf } => {
565                let len = *len as usize;
566                // SAFETY: len is guaranteed to be <= INLINE_CAP
567                let buf = unsafe { buf.get_unchecked(..len) };
568                // SAFETY: buf is guaranteed to be valid utf8 for ..len bytes
569                unsafe { ::core::str::from_utf8_unchecked(buf) }
570            }
571        }
572    }
573
574    fn ptr_eq(&self, other: &Self) -> bool {
575        match (self, other) {
576            (Self::Heap(l0), Self::Heap(r0)) => Arc::ptr_eq(l0, r0),
577            (Self::Static(l0), Self::Static(r0)) => core::ptr::eq(l0, r0),
578            (Self::Inline { len: l_len, buf: l_buf }, Self::Inline { len: r_len, buf: r_buf }) => {
579                l_len == r_len && l_buf == r_buf
580            }
581            _ => false,
582        }
583    }
584}
585
586/// Convert value to [`SmolStr`] using [`fmt::Display`], potentially without allocating.
587///
588/// Almost identical to [`ToString`], but converts to `SmolStr` instead.
589pub trait ToSmolStr {
590    fn to_smolstr(&self) -> SmolStr;
591}
592
593/// [`str`] methods producing [`SmolStr`]s.
594pub trait StrExt: private::Sealed {
595    /// Returns the lowercase equivalent of this string slice as a new [`SmolStr`],
596    /// potentially without allocating.
597    ///
598    /// See [`str::to_lowercase`].
599    #[must_use = "this returns a new SmolStr without modifying the original"]
600    fn to_lowercase_smolstr(&self) -> SmolStr;
601
602    /// Returns the uppercase equivalent of this string slice as a new [`SmolStr`],
603    /// potentially without allocating.
604    ///
605    /// See [`str::to_uppercase`].
606    #[must_use = "this returns a new SmolStr without modifying the original"]
607    fn to_uppercase_smolstr(&self) -> SmolStr;
608
609    /// Returns the ASCII lowercase equivalent of this string slice as a new [`SmolStr`],
610    /// potentially without allocating.
611    ///
612    /// See [`str::to_ascii_lowercase`].
613    #[must_use = "this returns a new SmolStr without modifying the original"]
614    fn to_ascii_lowercase_smolstr(&self) -> SmolStr;
615
616    /// Returns the ASCII uppercase equivalent of this string slice as a new [`SmolStr`],
617    /// potentially without allocating.
618    ///
619    /// See [`str::to_ascii_uppercase`].
620    #[must_use = "this returns a new SmolStr without modifying the original"]
621    fn to_ascii_uppercase_smolstr(&self) -> SmolStr;
622
623    /// Replaces all matches of a &str with another &str returning a new [`SmolStr`],
624    /// potentially without allocating.
625    ///
626    /// See [`str::replace`].
627    #[must_use = "this returns a new SmolStr without modifying the original"]
628    fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr;
629
630    /// Replaces first N matches of a &str with another &str returning a new [`SmolStr`],
631    /// potentially without allocating.
632    ///
633    /// See [`str::replacen`].
634    #[must_use = "this returns a new SmolStr without modifying the original"]
635    fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr;
636}
637
638impl StrExt for str {
639    #[inline]
640    fn to_lowercase_smolstr(&self) -> SmolStr {
641        let len = self.len();
642        if len <= INLINE_CAP {
643            let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_lowercase);
644            from_buf_and_chars(buf, len - rest.len(), rest.chars().flat_map(|c| c.to_lowercase()))
645        } else {
646            self.to_lowercase().into()
647        }
648    }
649
650    #[inline]
651    fn to_uppercase_smolstr(&self) -> SmolStr {
652        let len = self.len();
653        if len <= INLINE_CAP {
654            let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_uppercase);
655            from_buf_and_chars(buf, len - rest.len(), rest.chars().flat_map(|c| c.to_uppercase()))
656        } else {
657            self.to_uppercase().into()
658        }
659    }
660
661    #[inline]
662    fn to_ascii_lowercase_smolstr(&self) -> SmolStr {
663        let len = self.len();
664        if len <= INLINE_CAP {
665            let mut buf = [0u8; INLINE_CAP];
666            buf[..len].copy_from_slice(self.as_bytes());
667            buf[..len].make_ascii_lowercase();
668            SmolStr(Repr::Inline {
669                // SAFETY: `len` is in bounds
670                len: unsafe { InlineSize::transmute_from_u8(len as u8) },
671                buf,
672            })
673        } else {
674            self.to_ascii_lowercase().into()
675        }
676    }
677
678    #[inline]
679    fn to_ascii_uppercase_smolstr(&self) -> SmolStr {
680        let len = self.len();
681        if len <= INLINE_CAP {
682            let mut buf = [0u8; INLINE_CAP];
683            buf[..len].copy_from_slice(self.as_bytes());
684            buf[..len].make_ascii_uppercase();
685            SmolStr(Repr::Inline {
686                // SAFETY: `len` is in bounds
687                len: unsafe { InlineSize::transmute_from_u8(len as u8) },
688                buf,
689            })
690        } else {
691            self.to_ascii_uppercase().into()
692        }
693    }
694
695    #[inline]
696    fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr {
697        self.replacen_smolstr(from, to, usize::MAX)
698    }
699
700    #[inline]
701    fn replacen_smolstr(&self, from: &str, to: &str, mut count: usize) -> SmolStr {
702        // Fast path for replacing a single ASCII character with another inline.
703        if let [from_u8] = from.as_bytes()
704            && let [to_u8] = to.as_bytes()
705        {
706            return if self.len() <= count {
707                // SAFETY: `from_u8` & `to_u8` are ascii
708                unsafe { replacen_1_ascii(self, |b| if b == from_u8 { *to_u8 } else { *b }) }
709            } else {
710                unsafe {
711                    replacen_1_ascii(self, |b| {
712                        if b == from_u8 && count != 0 {
713                            count -= 1;
714                            *to_u8
715                        } else {
716                            *b
717                        }
718                    })
719                }
720            };
721        }
722
723        let mut result = SmolStrBuilder::new();
724        let mut last_end = 0;
725        for (start, part) in self.match_indices(from).take(count) {
726            // SAFETY: `start` is guaranteed to be within the bounds of `self` as per
727            // `match_indices` and last_end is always less than or equal to `start`
728            result.push_str(unsafe { self.get_unchecked(last_end..start) });
729            result.push_str(to);
730            last_end = start + part.len();
731        }
732        // SAFETY: `self.len()` is guaranteed to be within the bounds of `self` and last_end is
733        // always less than or equal to `self.len()`
734        result.push_str(unsafe { self.get_unchecked(last_end..self.len()) });
735        SmolStr::from(result)
736    }
737}
738
739/// SAFETY: `map` fn must only replace ascii with ascii or return unchanged bytes.
740#[inline]
741unsafe fn replacen_1_ascii(src: &str, mut map: impl FnMut(&u8) -> u8) -> SmolStr {
742    if src.len() <= INLINE_CAP {
743        let mut buf = [0u8; INLINE_CAP];
744        for (idx, b) in src.as_bytes().iter().enumerate() {
745            buf[idx] = map(b);
746        }
747        SmolStr(Repr::Inline {
748            // SAFETY: `len` is in bounds
749            len: unsafe { InlineSize::transmute_from_u8(src.len() as u8) },
750            buf,
751        })
752    } else {
753        let out = src.as_bytes().iter().map(map).collect();
754        // SAFETY: We replaced ascii with ascii on valid utf8 strings.
755        unsafe { String::from_utf8_unchecked(out).into() }
756    }
757}
758
759/// Inline version of std fn `convert_while_ascii`. `s` must have len <= 23.
760#[inline]
761fn inline_convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> ([u8; INLINE_CAP], &str) {
762    // Process the input in chunks of 16 bytes to enable auto-vectorization.
763    // Previously the chunk size depended on the size of `usize`,
764    // but on 32-bit platforms with sse or neon is also the better choice.
765    // The only downside on other platforms would be a bit more loop-unrolling.
766    const N: usize = 16;
767
768    debug_assert!(s.len() <= INLINE_CAP, "only for inline-able strings");
769
770    let mut slice = s.as_bytes();
771    let mut out = [0u8; INLINE_CAP];
772    let mut out_slice = &mut out[..slice.len()];
773    let mut is_ascii = [false; N];
774
775    while slice.len() >= N {
776        // SAFETY: checked in loop condition
777        let chunk = unsafe { slice.get_unchecked(..N) };
778        // SAFETY: out_slice has at least same length as input slice and gets sliced with the same offsets
779        let out_chunk = unsafe { out_slice.get_unchecked_mut(..N) };
780
781        for j in 0..N {
782            is_ascii[j] = chunk[j] <= 127;
783        }
784
785        // Auto-vectorization for this check is a bit fragile, sum and comparing against the chunk
786        // size gives the best result, specifically a pmovmsk instruction on x86.
787        // See https://github.com/llvm/llvm-project/issues/96395 for why llvm currently does not
788        // currently recognize other similar idioms.
789        if is_ascii.iter().map(|x| *x as u8).sum::<u8>() as usize != N {
790            break;
791        }
792
793        for j in 0..N {
794            out_chunk[j] = convert(&chunk[j]);
795        }
796
797        slice = unsafe { slice.get_unchecked(N..) };
798        out_slice = unsafe { out_slice.get_unchecked_mut(N..) };
799    }
800
801    // handle the remainder as individual bytes
802    while !slice.is_empty() {
803        let byte = slice[0];
804        if byte > 127 {
805            break;
806        }
807        // SAFETY: out_slice has at least same length as input slice
808        unsafe {
809            *out_slice.get_unchecked_mut(0) = convert(&byte);
810        }
811        slice = unsafe { slice.get_unchecked(1..) };
812        out_slice = unsafe { out_slice.get_unchecked_mut(1..) };
813    }
814
815    unsafe {
816        // SAFETY: we know this is a valid char boundary
817        // since we only skipped over leading ascii bytes
818        let rest = core::str::from_utf8_unchecked(slice);
819        (out, rest)
820    }
821}
822
823impl<T> ToSmolStr for T
824where
825    T: fmt::Display + ?Sized,
826{
827    fn to_smolstr(&self) -> SmolStr {
828        format_smolstr!("{}", self)
829    }
830}
831
832mod private {
833    /// No downstream impls allowed.
834    pub trait Sealed {}
835    impl Sealed for str {}
836}
837
838/// Formats arguments to a [`SmolStr`], potentially without allocating.
839///
840/// See [`alloc::format!`] or [`format_args!`] for syntax documentation.
841#[macro_export]
842macro_rules! format_smolstr {
843    ($($tt:tt)*) => {{
844        let mut w = $crate::SmolStrBuilder::new();
845        ::core::fmt::Write::write_fmt(&mut w, format_args!($($tt)*)).expect("a formatting trait implementation returned an error");
846        w.finish()
847    }};
848}
849
850/// A builder that can be used to efficiently build a [`SmolStr`].
851///
852/// This won't allocate if the final string fits into the inline buffer.
853#[derive(Clone, Default, Debug, PartialEq, Eq)]
854pub struct SmolStrBuilder(SmolStrBuilderRepr);
855
856#[derive(Clone, Debug, PartialEq, Eq)]
857enum SmolStrBuilderRepr {
858    Inline { len: usize, buf: [u8; INLINE_CAP] },
859    Heap(String),
860}
861
862impl Default for SmolStrBuilderRepr {
863    #[inline]
864    fn default() -> Self {
865        SmolStrBuilderRepr::Inline { buf: [0; INLINE_CAP], len: 0 }
866    }
867}
868
869impl SmolStrBuilder {
870    /// Creates a new empty [`SmolStrBuilder`].
871    #[must_use]
872    pub const fn new() -> Self {
873        Self(SmolStrBuilderRepr::Inline { buf: [0; INLINE_CAP], len: 0 })
874    }
875
876    /// Builds a [`SmolStr`] from `self`.
877    #[must_use]
878    pub fn finish(&self) -> SmolStr {
879        SmolStr(match &self.0 {
880            &SmolStrBuilderRepr::Inline { len, buf } => {
881                debug_assert!(len <= INLINE_CAP);
882                Repr::Inline {
883                    // SAFETY: We know that `value.len` is less than or equal to the maximum value of `InlineSize`
884                    len: unsafe { InlineSize::transmute_from_u8(len as u8) },
885                    buf,
886                }
887            }
888            SmolStrBuilderRepr::Heap(heap) => Repr::new(heap),
889        })
890    }
891
892    /// Appends the given [`char`] to the end of `self`'s buffer.
893    pub fn push(&mut self, c: char) {
894        match &mut self.0 {
895            SmolStrBuilderRepr::Inline { len, buf } => {
896                let char_len = c.len_utf8();
897                let new_len = *len + char_len;
898                if new_len <= INLINE_CAP {
899                    c.encode_utf8(&mut buf[*len..]);
900                    *len += char_len;
901                } else {
902                    let mut heap = String::with_capacity(new_len);
903                    // copy existing inline bytes over to the heap
904                    // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes
905                    unsafe { heap.as_mut_vec().extend_from_slice(&buf[..*len]) };
906                    heap.push(c);
907                    self.0 = SmolStrBuilderRepr::Heap(heap);
908                }
909            }
910            SmolStrBuilderRepr::Heap(h) => h.push(c),
911        }
912    }
913
914    /// Appends a given string slice onto the end of `self`'s buffer.
915    pub fn push_str(&mut self, s: &str) {
916        match &mut self.0 {
917            SmolStrBuilderRepr::Inline { len, buf } => {
918                let old_len = *len;
919                *len += s.len();
920
921                // if the new length will fit on the stack (even if it fills it entirely)
922                if *len <= INLINE_CAP {
923                    buf[old_len..*len].copy_from_slice(s.as_bytes());
924                    return; // skip the heap push below
925                }
926
927                let mut heap = String::with_capacity(*len);
928
929                // copy existing inline bytes over to the heap
930                // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes
931                unsafe { heap.as_mut_vec().extend_from_slice(&buf[..old_len]) };
932                heap.push_str(s);
933                self.0 = SmolStrBuilderRepr::Heap(heap);
934            }
935            SmolStrBuilderRepr::Heap(heap) => heap.push_str(s),
936        }
937    }
938}
939
940impl fmt::Write for SmolStrBuilder {
941    #[inline]
942    fn write_str(&mut self, s: &str) -> fmt::Result {
943        self.push_str(s);
944        Ok(())
945    }
946}
947
948impl From<SmolStrBuilder> for SmolStr {
949    fn from(value: SmolStrBuilder) -> Self {
950        value.finish()
951    }
952}
953
954#[cfg(feature = "arbitrary")]
955#[cfg_attr(docsrs, doc(cfg(feature = "arbitrary")))]
956impl<'a> arbitrary::Arbitrary<'a> for SmolStr {
957    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result<Self, arbitrary::Error> {
958        let s = <&str>::arbitrary(u)?;
959        Ok(SmolStr::new(s))
960    }
961}
962
963#[cfg(feature = "borsh")]
964#[cfg_attr(docsrs, doc(cfg(feature = "borsh")))]
965mod borsh;
966#[cfg(feature = "serde")]
967#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
968mod serde;
969
970#[test]
971fn from_buf_and_chars_size_hinted_heap() {
972    let str = from_buf_and_chars(
973        *b"abcdefghijklmnopqr00000",
974        18,
975        "_0x1x2x3x4x5x6x7x8x9x10x11x12x13".chars(),
976    );
977
978    assert_eq!(str, "abcdefghijklmnopqr_0x1x2x3x4x5x6x7x8x9x10x11x12x13");
979}