tiny_str/
lib.rs

1/*  Copyright (C) 2025 Saúl Valdelvira
2 *
3 *  This program is free software: you can redistribute it and/or modify
4 *  it under the terms of the GNU General Public License as published by
5 *  the Free Software Foundation, version 3.
6 *
7 *  This program is distributed in the hope that it will be useful,
8 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *  GNU General Public License for more details.
11 *
12 *  You should have received a copy of the GNU General Public License
13 *  along with this program.  If not, see <https://www.gnu.org/licenses/>. */
14
15//! Tiny string
16//!
17//! A string that can store a small amount of bytes on the stack.
18//!
19//! This struct provides a string-like API, but performs SSO (Small String Optimization)
20//! This means that a `TinyString<N>` stores up to N bytes on the stack.
21//! If the string grows bigger than that, it moves the contents to the heap.
22//!
23//! # Example
24//! ```
25//! use tiny_str::TinyString;
26//!
27//! let mut s = TinyString::<10>::new();
28//!
29//! for (i, c) in (b'0'..=b'9').enumerate() {
30//!     s.push(c as char);
31//!     assert_eq!(s.len(), i + 1);
32//! }
33//!
34//! // Up to this point, no heap allocations are needed.
35//! // The string is stored on the stack.
36//!
37//! s.push_str("abc"); // This moves the string to the heap
38//!
39//! assert_eq!(&s[..], "0123456789abc")
40//! ```
41//!
42//! # Memory layout
43//! TinyString is based on [TinyVec], just like [alloc::string::String] if based
44//! on [alloc::vec::Vec].
45//!
46//! You can read the [tiny_vec] crate documentation to learn about the internal
47//! representation of the data.
48
49#![no_std]
50
51#![cfg_attr(feature = "use-nightly-features", feature(extend_one))]
52
53use core::fmt::{self, Display};
54use core::ops::{Bound, Deref, DerefMut, Range, RangeBounds};
55use core::str::{self, FromStr, Utf8Error};
56
57extern crate alloc;
58use alloc::vec::Vec;
59use alloc::boxed::Box;
60
61use tiny_vec::TinyVec;
62pub mod iter;
63
64pub mod drain;
65
66const MAX_N_STACK_ELEMENTS: usize = tiny_vec::n_elements_for_stack::<u8>();
67
68/// A string that can store a small amount of bytes on the stack.
69pub struct TinyString<const N: usize = MAX_N_STACK_ELEMENTS> {
70    buf: TinyVec<u8, N>,
71}
72
73impl<const N: usize> TinyString<N> {
74    fn slice_range<R>(&self, range: R, len: usize) -> Range<usize>
75    where
76        R: RangeBounds<usize>
77    {
78        let start = match range.start_bound() {
79            Bound::Included(n) => *n,
80            Bound::Excluded(n) => *n + 1,
81            Bound::Unbounded => 0,
82        };
83
84        let end = match range.end_bound() {
85            Bound::Included(n) => *n + 1,
86            Bound::Excluded(n) => *n,
87            Bound::Unbounded => len,
88        };
89
90        assert!(start <= end);
91        assert!(end <= len);
92        assert!(self.is_char_boundary(start));
93        assert!(self.is_char_boundary(end));
94
95        Range { start, end }
96    }
97}
98
99impl<const N: usize> TinyString<N> {
100
101    /// Creates a new [TinyString]
102    #[inline]
103    pub const fn new() -> Self {
104        Self { buf: TinyVec::new() }
105    }
106
107    /// Creates a new [TinyString] with the given capacity
108    pub fn with_capacity(cap: usize) -> Self {
109        Self { buf: TinyVec::with_capacity(cap) }
110    }
111
112    /// Creates a new [TinyString] from the given utf8 buffer.
113    ///
114    /// # Errors
115    /// If the byte buffer contains invalid uft8
116    pub fn from_utf8(utf8: TinyVec<u8, N>) -> Result<Self,Utf8Error> {
117        str::from_utf8(utf8.as_slice())?;
118        Ok(Self { buf: utf8 })
119    }
120
121    /// Creates a new [TinyString] from the given utf8 buffer.
122    ///
123    /// # Safety
124    /// The caller must ensure that the given contains valid utf8
125    #[inline(always)]
126    pub const unsafe fn from_utf8_unchecked(utf8: TinyVec<u8, N>) -> Self {
127        Self { buf: utf8 }
128    }
129
130    /// Creates a new `TinyString` by repeating the given `slice` `n` times.
131    ///
132    /// # Panics
133    /// If the capacity requires overflows `isize::MAX`
134    ///
135    /// # Example
136    /// ```
137    /// use tiny_str::TinyString;
138    /// let s = TinyString::<10>::repeat("abc", 5);
139    /// assert_eq!(s.as_str(), "abcabcabcabcabc");
140    /// ```
141    pub fn repeat(slice: &str, n: usize) -> Self {
142        let len = slice.len() * n;
143        let mut s = Self::with_capacity(len);
144        let bytes = slice.as_bytes();
145        for _ in 0..n {
146            s.buf.extend_from_slice_copied(bytes);
147        }
148        s
149    }
150
151    /// Returns the number of elements inside this string
152    #[inline]
153    pub const fn len(&self) -> usize { self.buf.len() }
154
155    /// Returns true if the string is empty
156    #[inline]
157    pub const fn is_empty(&self) -> bool { self.buf.is_empty() }
158
159    /// Returns the allocated capacity for this string
160    #[inline]
161    pub const fn capacity(&self) -> usize { self.buf.capacity() }
162
163    /// Returns a str slice
164    #[inline]
165    pub const fn as_str(&self) -> &str {
166        unsafe { str::from_utf8_unchecked(self.buf.as_slice()) }
167    }
168
169    /// Returns a mutable str slice
170    #[inline]
171    pub const fn as_mut_str(&mut self) -> &mut str {
172        unsafe { str::from_utf8_unchecked_mut(self.buf.as_mut_slice()) }
173    }
174
175    /// Returns a const pointer to the buffer
176    ///
177    /// This method shadows [str::as_ptr], to avoid a deref
178    #[inline]
179    pub const fn as_ptr(&self) -> *const u8 {
180        self.buf.as_ptr()
181    }
182
183    /// Returns a mutable pointer to the buffer
184    ///
185    /// This method shadows [str::as_mut_ptr], to avoid a deref
186    #[inline]
187    pub const fn as_mut_ptr(&mut self) -> *mut u8 {
188        self.buf.as_mut_ptr()
189    }
190
191    /// Returns the string as a byte slice
192    #[inline]
193    pub const fn as_bytes(&self) -> &[u8] {
194        self.buf.as_slice()
195    }
196
197    /// Returns the string as a byte slice
198    ///
199    /// Returns the string as a mutable bytes slice
200    ///
201    /// # Safety
202    /// Modifying this byte slice is dangerous, because it can leave the
203    /// buffer on an inconsistent state.
204    /// Strings must be valid UTF8. So manually changing the byte contents
205    /// of the string could lead to bugs.
206    ///
207    /// # Example
208    /// ```
209    /// use tiny_str::TinyString;
210    ///
211    /// let mut s = TinyString::<10>::from("hello");
212    /// unsafe {
213    ///     let slice = s.as_mut_bytes();
214    ///     assert_eq!(&[104, 101, 108, 108, 111][..], &slice[..]);
215    ///     slice.reverse();
216    /// }
217    /// assert_eq!(s, "olleh");
218    /// ```
219    #[inline]
220    pub const unsafe fn as_mut_bytes(&mut self) -> &mut [u8] {
221        self.buf.as_mut_slice()
222    }
223
224    /// Returns a mutable reference to the contents of this `TinyString`
225    ///
226    /// # Safety
227    /// Modifying this [TinyVec] is dangerous, because it can leave the
228    /// buffer on an inconsistent state.
229    /// Strings must be valid UTF8. So mutating the vector without respecting
230    /// that could lead to bugs.
231    ///
232    /// # Example
233    /// ```
234    /// use tiny_str::TinyString;
235    ///
236    /// let mut s = TinyString::<10>::from("hello");
237    /// unsafe {
238    ///     let vec = s.as_mut_vec();
239    ///     assert_eq!(&[104, 101, 108, 108, 111][..], &vec[..]);
240    ///     vec.drain(1..3);
241    /// }
242    /// assert_eq!(s, "hlo");
243    /// ```
244    #[inline]
245    pub const unsafe fn as_mut_vec(&mut self) -> &mut TinyVec<u8, N> {
246        &mut self.buf
247    }
248
249    /// Pushes a character into the string
250    pub fn push(&mut self, c: char) {
251        let len = c.len_utf8();
252        if len == 1 {
253            self.buf.push(c as u8);
254        } else {
255            let mut buf = [0_u8; 4];
256            c.encode_utf8(&mut buf);
257            self.buf.extend_from_slice(&buf[..len]);
258        }
259    }
260
261    /// Returns the last char of this string, if present
262    ///
263    /// # Example
264    /// ```
265    /// use tiny_str::TinyString;
266    ///
267    /// let mut s = TinyString::<10>::new();
268    ///
269    /// s.push_str("abcd");
270    ///
271    /// assert_eq!(s.pop(), Some('d'));
272    /// assert_eq!(s, "abc");
273    /// ```
274    pub fn pop(&mut self) -> Option<char> {
275        let c = self.chars().next_back()?;
276        let new_len = self.len() - c.len_utf8();
277        unsafe {
278            self.buf.set_len(new_len);
279        }
280        Some(c)
281    }
282
283    /// Pushes a str slice into this string
284    #[inline]
285    pub fn push_str(&mut self, s: &str) {
286        self.buf.extend_from_slice_copied(s.as_bytes());
287    }
288
289    /// Shrinks the capacity of this string to fit exactly it's length
290    #[inline]
291    pub fn shrink_to_fit(&mut self) {
292        self.buf.shrink_to_fit();
293    }
294
295    /// Clears the string
296    ///
297    /// # Example
298    /// ```
299    /// use tiny_str::TinyString;
300    ///
301    /// let mut s: TinyString<5> = TinyString::from("Hello");
302    /// s.clear();
303    ///
304    /// assert!(s.is_empty());
305    /// assert_eq!(s.as_str(), "");
306    /// ```
307    #[inline]
308    pub fn clear(&mut self) {
309        self.buf.clear();
310    }
311
312    /// Reserves space for, at least, n bytes
313    #[inline]
314    pub fn reserve(&mut self, n: usize) {
315        self.buf.reserve(n);
316    }
317
318    /// Reserves space for exactly n more bytes
319    #[inline]
320    pub fn reserve_exact(&mut self, n: usize) {
321        self.buf.reserve_exact(n);
322    }
323
324    /// Converts this TinyString into a boxed str
325    ///
326    /// # Example
327    /// ```
328    /// use tiny_str::TinyString;
329    ///
330    /// let mut s = TinyString::<10>::new();
331    /// s.push_str("abc");
332    ///
333    /// let b = s.into_boxed_str();
334    /// assert_eq!(&*b, "abc");
335    /// ```
336    pub fn into_boxed_str(self) -> Box<str> {
337        let b = self.buf.into_boxed_slice();
338        unsafe { alloc::str::from_boxed_utf8_unchecked(b) }
339    }
340
341    /// Copies the slice from the given range to the back
342    /// of this string.
343    ///
344    /// # Panics
345    /// - If the range is invalid for [0, self.len)
346    /// - If either the start or the end of the range fall
347    ///   outside a char boundary
348    ///
349    /// # Example
350    /// ```
351    /// use tiny_str::TinyString;
352    ///
353    /// let mut s = TinyString::<10>::from("abcdefg");
354    ///
355    /// s.extend_from_within(3..=5);
356    ///
357    /// assert_eq!(s, "abcdefgdef");
358    /// ```
359    pub fn extend_from_within<R>(&mut self, range: R)
360    where
361        R: RangeBounds<usize>
362    {
363        let Range { start, end } = self.slice_range(range, self.len());
364        self.buf.extend_from_within_copied(start..end);
365    }
366
367    /// Consumes and leaks the `TinyString`, returning a mutable reference to the contents,
368    /// `&'a mut str`.
369    ///
370    /// This method shrinks the buffer, and moves it to the heap in case it lived
371    /// on the stack.
372    ///
373    /// This function is mainly useful for data that lives for the remainder of
374    /// the program's life. Dropping the returned reference will cause a memory
375    /// leak.
376    ///
377    /// # Example
378    /// ```
379    /// let x = tiny_str::TinyString::<10>::from("ABCDEFG");
380    ///
381    /// let static_ref: &'static mut str = x.leak();
382    /// static_ref.make_ascii_lowercase();
383    ///
384    /// assert_eq!(static_ref, "abcdefg");
385    /// # // FIXME(https://github.com/rust-lang/miri/issues/3670):
386    /// # // use -Zmiri-disable-leak-check instead of unleaking in tests meant to leak.
387    /// # drop(unsafe{Box::from_raw(static_ref)})
388    /// ```
389    pub fn leak<'a>(mut self) -> &'a mut str {
390        self.buf.move_to_heap_exact();
391        self.buf.shrink_to_fit_heap_only();
392        unsafe {
393            let bytes = self.buf.leak();
394            str::from_utf8_unchecked_mut(bytes)
395        }
396    }
397
398    /// Splits the string into two at the given byte index.
399    ///
400    /// Returns a newly allocated `String`. `self` contains bytes `[0, at)`, and
401    /// the returned `String` contains bytes `[at, len)`. `at` must be on the
402    /// boundary of a UTF-8 code point.
403    ///
404    /// Note that the capacity of `self` does not change.
405    ///
406    /// # Panics
407    ///
408    /// Panics if `at` is not on a `UTF-8` code point boundary, or if it is beyond the last
409    /// code point of the string.
410    ///
411    /// # Examples
412    /// ```
413    /// let mut hello = tiny_str::TinyString::<8>::from("Hello, World!");
414    /// let world = hello.split_off(7);
415    /// assert_eq!(hello, "Hello, ");
416    /// assert_eq!(world, "World!");
417    /// ```
418    #[inline]
419    #[must_use = "use `.truncate()` if you don't need the other half"]
420    pub fn split_off(&mut self, at: usize) -> TinyString<N> {
421        assert!(self.is_char_boundary(at));
422        let other = self.buf.split_off(at);
423        unsafe { TinyString::from_utf8_unchecked(other) }
424    }
425
426    /// Shortens this `TinyString` to the specified length.
427    ///
428    /// If `new_len` is greater than or equal to the string's current length, this has no
429    /// effect.
430    ///
431    /// Note that this method has no effect on the allocated capacity
432    /// of the string
433    ///
434    /// # Panics
435    ///
436    /// Panics if `new_len` does not lie on a [`char`] boundary.
437    ///
438    /// # Example
439    /// ```
440    /// let mut s = tiny_str::TinyString::<6>::from("hello");
441    ///
442    /// s.truncate(2);
443    ///
444    /// assert_eq!(s, "he");
445    /// ```
446    pub fn truncate(&mut self, new_len: usize) {
447        assert!(self.is_char_boundary(new_len));
448        self.buf.truncate(new_len);
449    }
450
451    /// Inserts a character into this `TinyString` at a byte position.
452    ///
453    /// This is an *O*(*n*) operation as it requires copying every element in the
454    /// buffer.
455    ///
456    /// # Panics
457    ///
458    /// Panics if `index` is larger than the `TinyString`'s length, or if it does not
459    /// lie on a [`char`] boundary.
460    ///
461    /// # Example
462    /// ```
463    /// let mut s = tiny_str::TinyString::<10>::from("Hello world :)");
464    ///
465    /// s.insert(5, '@');
466    ///
467    /// assert_eq!(s, "Hello@ world :)");
468    /// ```
469    pub fn insert(&mut self, index: usize, ch: char) {
470        assert!(self.is_char_boundary(index));
471        let mut buf = [0; 4];
472        ch.encode_utf8(&mut buf);
473        let len = ch.len_utf8();
474        self.buf.insert_slice(index, &buf[..len]).unwrap_or_else(|_| {
475            unreachable!("We've checked the index in the assertion above")
476        })
477    }
478
479    /// Inserts a string slice into this `TinyString` at a byte position.
480    ///
481    /// This is an *O*(*n*) operation as it requires copying every element in the
482    /// buffer.
483    ///
484    /// # Panics
485    ///
486    /// Panics if `index` is larger than the `TinyString`'s length, or if it does not
487    /// lie on a [`char`] boundary.
488    ///
489    /// # Example
490    /// ```
491    /// let mut s = tiny_str::TinyString::<8>::from("Heworld");
492    ///
493    /// s.insert_str(2, "llo ");
494    ///
495    /// assert_eq!("Hello world", s);
496    /// ```
497    pub fn insert_str(&mut self, index: usize, s: &str) {
498        assert!(self.is_char_boundary(index));
499        self.buf.insert_slice(index, s.as_bytes()).unwrap_or_else(|_| {
500            unreachable!("We've checked the index in the assertion above")
501        })
502    }
503}
504
505impl<const N: usize> Default for TinyString<N> {
506    fn default() -> Self {
507        Self::new()
508    }
509}
510
511impl<const N: usize> Deref for TinyString<N> {
512    type Target = str;
513
514    fn deref(&self) -> &Self::Target {
515        self.as_str()
516    }
517}
518
519impl<const N: usize> DerefMut for TinyString<N> {
520    fn deref_mut(&mut self) -> &mut Self::Target {
521        self.as_mut_str()
522    }
523}
524
525impl<const N: usize> From<&str> for TinyString<N> {
526    fn from(value: &str) -> Self {
527        let mut s = Self::with_capacity(value.len());
528        s.push_str(value);
529        s
530    }
531}
532
533impl<const N: usize> TryFrom<&[u8]> for TinyString<N> {
534    type Error = Utf8Error;
535
536    fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
537        str::from_utf8(value)?;
538        Ok(unsafe { Self::from_utf8_unchecked(TinyVec::from_slice_copied(value)) })
539    }
540}
541
542impl<const N: usize> TryFrom<TinyVec<u8, N>> for TinyString<N> {
543    type Error = Utf8Error;
544
545    fn try_from(value: TinyVec<u8, N>) -> Result<Self, Self::Error> {
546        Self::from_utf8(value)
547    }
548}
549
550impl<const N: usize> TryFrom<Vec<u8>> for TinyString<N> {
551    type Error = Utf8Error;
552
553    fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
554        str::from_utf8(value.as_slice())?;
555        Ok(unsafe { Self::from_utf8_unchecked(TinyVec::from_vec(value)) })
556    }
557}
558
559impl<const N: usize> From<TinyString<N>> for TinyVec<u8, N> {
560    fn from(value: TinyString<N>) -> Self {
561        value.buf
562    }
563}
564
565impl<const N: usize> From<TinyString<N>> for Vec<u8> {
566    fn from(value: TinyString<N>) -> Self {
567        value.buf.into_vec()
568    }
569}
570
571impl<const N: usize> From<TinyString<N>> for Box<str> {
572    fn from(value: TinyString<N>) -> Self {
573        value.into_boxed_str()
574    }
575}
576
577impl<const N: usize> FromIterator<char> for TinyString<N> {
578    fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
579        let mut s = Self::new();
580        s.extend(iter);
581        s
582    }
583}
584
585impl<const N: usize> Extend<char> for TinyString<N> {
586    fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
587        let iter = iter.into_iter();
588        let cap = match iter.size_hint() {
589            (_, Some(n)) => n,
590            (n, _) => n,
591        };
592        self.reserve(cap);
593        for c in iter {
594            self.push(c);
595        }
596    }
597
598    #[cfg(feature = "use-nightly-features")]
599    fn extend_one(&mut self, item: char) {
600        self.push(item);
601    }
602}
603
604impl<const N: usize, S> PartialEq<S> for TinyString<N>
605where
606    S: AsRef<str>,
607{
608    fn eq(&self, other: &S) -> bool {
609        self.as_str() == other.as_ref()
610    }
611}
612
613impl<const N: usize> PartialEq<TinyString<N>> for &str {
614    fn eq(&self, other: &TinyString<N>) -> bool {
615        self.as_bytes() == other.as_bytes()
616    }
617}
618
619impl<const N: usize> Eq for TinyString<N> { }
620
621impl<const N: usize> AsRef<[u8]> for TinyString<N> {
622    fn as_ref(&self) -> &[u8] {
623        self.as_bytes()
624    }
625}
626
627impl<const N: usize> AsRef<str> for TinyString<N> {
628    fn as_ref(&self) -> &str {
629        self.as_str()
630    }
631}
632
633impl<const N: usize> AsMut<str> for TinyString<N> {
634    fn as_mut(&mut self) -> &mut str {
635        self.as_mut_str()
636    }
637}
638
639impl<const N: usize> fmt::Debug for TinyString<N> {
640    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
641        write!(f, "{:?}", self.bytes())
642    }
643}
644
645impl<const N: usize> Display for TinyString<N> {
646    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
647        write!(f, "{}", self.as_str())
648    }
649}
650
651impl<const N: usize> FromStr for TinyString<N> {
652    type Err = core::convert::Infallible;
653
654    fn from_str(s: &str) -> Result<Self, Self::Err> {
655        Ok(Self::from(s))
656    }
657}
658
659#[cfg(test)]
660mod test;