Skip to main content

planck_noalloc/
smallstr.rs

1//! A UTF-8 string backed by [`SmallVec`].
2//!
3//! [`SmallStr<N>`] stores up to `N` bytes inline on the stack. When the string grows
4//! beyond `N` bytes, it spills to a heap-allocated buffer.
5//!
6//! # Examples
7//!
8//! ```
9//! use planck_noalloc::smallstr::SmallStr;
10//!
11//! let mut s = SmallStr::<16>::new();
12//! s.push_str("hello");
13//! s.push(' ');
14//! s.push_str("world");
15//! assert_eq!(s.as_str(), "hello world");
16//! assert!(s.is_inline());
17//! ```
18
19use alloc::string::String;
20use core::fmt;
21use core::str;
22
23use crate::smallvec::SmallVec;
24
25/// A UTF-8 string that stores up to `N` bytes inline, spilling to the heap when exceeded.
26#[derive(Clone, Default)]
27pub struct SmallStr<const N: usize> {
28    buf: SmallVec<u8, N>,
29}
30
31/// Error returned when constructing a [`SmallStr`] from invalid UTF-8 data.
32#[derive(Debug, Clone)]
33pub struct FromUtf8Error;
34
35impl fmt::Display for FromUtf8Error {
36    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37        f.write_str("invalid UTF-8 in SmallStr")
38    }
39}
40
41impl SmallStr<0> {
42    // This exists so `new()` can be const, but only for the general case below.
43}
44
45impl<const N: usize> SmallStr<N> {
46    /// Creates a new, empty `SmallStr`.
47    #[must_use]
48    pub const fn new() -> Self {
49        Self {
50            buf: SmallVec::new(),
51        }
52    }
53
54    /// Creates a new `SmallStr` with at least the specified byte capacity.
55    #[must_use]
56    pub fn with_capacity(cap: usize) -> Self {
57        Self {
58            buf: SmallVec::with_capacity(cap),
59        }
60    }
61
62    /// Creates a `SmallStr` from a `SmallVec<u8, N>`, validating UTF-8.
63    ///
64    /// # Errors
65    ///
66    /// Returns `Err` if the bytes are not valid UTF-8.
67    pub fn from_small_vec(vec: SmallVec<u8, N>) -> Result<Self, FromUtf8Error> {
68        if str::from_utf8(vec.as_slice()).is_ok() {
69            Ok(Self { buf: vec })
70        } else {
71            Err(FromUtf8Error)
72        }
73    }
74
75    /// Creates a `SmallStr` from a `SmallVec<u8, N>` without checking UTF-8.
76    ///
77    /// # Safety
78    ///
79    /// The bytes must be valid UTF-8.
80    #[must_use]
81    pub unsafe fn from_small_vec_unchecked(vec: SmallVec<u8, N>) -> Self {
82        Self { buf: vec }
83    }
84
85    /// Returns the string as a `&str`.
86    #[must_use]
87    pub fn as_str(&self) -> &str {
88        // SAFETY: All mutating methods maintain the UTF-8 invariant.
89        unsafe { str::from_utf8_unchecked(self.buf.as_slice()) }
90    }
91
92    /// Returns the string as a `&mut str`.
93    #[must_use]
94    pub fn as_mut_str(&mut self) -> &mut str {
95        // SAFETY: All mutating methods maintain the UTF-8 invariant.
96        unsafe { str::from_utf8_unchecked_mut(self.buf.as_mut_slice()) }
97    }
98
99    /// Appends a string slice.
100    pub fn push_str(&mut self, s: &str) {
101        self.buf.extend(s.as_bytes().iter().copied());
102    }
103
104    /// Appends a character.
105    pub fn push(&mut self, ch: char) {
106        let mut buf = [0u8; 4];
107        let s = ch.encode_utf8(&mut buf);
108        self.push_str(s);
109    }
110
111    /// Removes and returns the last character, or `None` if empty.
112    pub fn pop(&mut self) -> Option<char> {
113        let s = self.as_str();
114        let ch = s.chars().next_back()?;
115        let new_len = self.len() - ch.len_utf8();
116        self.buf.truncate(new_len);
117        Some(ch)
118    }
119
120    /// Removes all content.
121    pub fn clear(&mut self) {
122        self.buf.clear();
123    }
124
125    /// Returns the byte length of the string.
126    #[must_use]
127    pub fn len(&self) -> usize {
128        self.buf.len()
129    }
130
131    /// Returns `true` if the string is empty.
132    #[must_use]
133    pub fn is_empty(&self) -> bool {
134        self.buf.is_empty()
135    }
136
137    /// Returns the byte capacity.
138    #[must_use]
139    pub fn capacity(&self) -> usize {
140        self.buf.capacity()
141    }
142
143    /// Returns `true` if the data is stored inline.
144    #[must_use]
145    pub fn is_inline(&self) -> bool {
146        self.buf.is_inline()
147    }
148
149    /// Reserves capacity for at least `additional` more bytes.
150    pub fn reserve(&mut self, additional: usize) {
151        self.buf.reserve(additional);
152    }
153
154    /// Shrinks the backing allocation to fit. May move data back inline.
155    pub fn shrink_to_fit(&mut self) {
156        self.buf.shrink_to_fit();
157    }
158
159    /// Forces the data to the heap if currently inline.
160    pub fn spill(&mut self) {
161        self.buf.spill();
162    }
163
164    /// Returns the underlying bytes as a slice.
165    #[must_use]
166    pub fn as_bytes(&self) -> &[u8] {
167        self.buf.as_slice()
168    }
169
170    /// Consumes the string and returns the underlying `SmallVec<u8, N>`.
171    #[must_use]
172    pub fn into_bytes(self) -> SmallVec<u8, N> {
173        self.buf
174    }
175
176    /// Consumes the string and returns a heap-allocated `String`.
177    #[must_use]
178    pub fn into_string(self) -> String {
179        // SAFETY: We maintain the UTF-8 invariant.
180        unsafe { String::from_utf8_unchecked(self.buf.into_vec()) }
181    }
182
183    /// Truncates the string to `new_len` bytes.
184    ///
185    /// # Panics
186    ///
187    /// Panics if `new_len` does not lie on a UTF-8 character boundary.
188    pub fn truncate(&mut self, new_len: usize) {
189        if new_len < self.len() {
190            assert!(
191                self.as_str().is_char_boundary(new_len),
192                "new_len is not a char boundary"
193            );
194            self.buf.truncate(new_len);
195        }
196    }
197
198    /// Inserts a string slice at `byte_index`.
199    ///
200    /// # Panics
201    ///
202    /// Panics if `byte_index` does not lie on a UTF-8 character boundary
203    /// or is out of bounds.
204    pub fn insert_str(&mut self, byte_index: usize, s: &str) {
205        assert!(
206            self.as_str().is_char_boundary(byte_index),
207            "byte_index is not a char boundary"
208        );
209        for (i, &b) in s.as_bytes().iter().enumerate() {
210            self.buf.insert(byte_index + i, b);
211        }
212    }
213}
214
215impl<const N: usize> fmt::Debug for SmallStr<N> {
216    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
217        fmt::Debug::fmt(self.as_str(), f)
218    }
219}
220
221impl<const N: usize> fmt::Display for SmallStr<N> {
222    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
223        fmt::Display::fmt(self.as_str(), f)
224    }
225}
226
227impl<const N: usize> fmt::Write for SmallStr<N> {
228    fn write_str(&mut self, s: &str) -> fmt::Result {
229        self.push_str(s);
230        Ok(())
231    }
232
233    fn write_char(&mut self, c: char) -> fmt::Result {
234        self.push(c);
235        Ok(())
236    }
237}
238
239impl<const N: usize> core::ops::Deref for SmallStr<N> {
240    type Target = str;
241
242    fn deref(&self) -> &str {
243        self.as_str()
244    }
245}
246
247impl<const N: usize> core::ops::DerefMut for SmallStr<N> {
248    fn deref_mut(&mut self) -> &mut str {
249        self.as_mut_str()
250    }
251}
252
253impl<const N: usize> PartialEq for SmallStr<N> {
254    fn eq(&self, other: &Self) -> bool {
255        self.as_str() == other.as_str()
256    }
257}
258
259impl<const N: usize> Eq for SmallStr<N> {}
260
261impl<const N: usize> PartialOrd for SmallStr<N> {
262    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
263        Some(self.cmp(other))
264    }
265}
266
267impl<const N: usize> Ord for SmallStr<N> {
268    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
269        self.as_str().cmp(other.as_str())
270    }
271}
272
273impl<const N: usize> core::hash::Hash for SmallStr<N> {
274    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
275        self.as_str().hash(state);
276    }
277}
278
279impl<const N: usize> PartialEq<str> for SmallStr<N> {
280    fn eq(&self, other: &str) -> bool {
281        self.as_str() == other
282    }
283}
284
285impl<const N: usize> PartialEq<&str> for SmallStr<N> {
286    fn eq(&self, other: &&str) -> bool {
287        self.as_str() == *other
288    }
289}
290
291impl<const N: usize> PartialEq<String> for SmallStr<N> {
292    fn eq(&self, other: &String) -> bool {
293        self.as_str() == other.as_str()
294    }
295}
296
297impl<const N: usize> From<&str> for SmallStr<N> {
298    fn from(s: &str) -> Self {
299        let mut ss = Self::with_capacity(s.len());
300        ss.push_str(s);
301        ss
302    }
303}
304
305impl<const N: usize> From<String> for SmallStr<N> {
306    fn from(s: String) -> Self {
307        let buf = SmallVec::from(s.into_bytes());
308        Self { buf }
309    }
310}
311
312impl<const N: usize> str::FromStr for SmallStr<N> {
313    type Err = core::convert::Infallible;
314
315    fn from_str(s: &str) -> Result<Self, Self::Err> {
316        Ok(Self::from(s))
317    }
318}
319
320impl<const N: usize> core::ops::Add<&str> for SmallStr<N> {
321    type Output = Self;
322
323    fn add(mut self, rhs: &str) -> Self {
324        self.push_str(rhs);
325        self
326    }
327}
328
329impl<const N: usize> core::ops::AddAssign<&str> for SmallStr<N> {
330    fn add_assign(&mut self, rhs: &str) {
331        self.push_str(rhs);
332    }
333}
334
335impl<const N: usize> Extend<char> for SmallStr<N> {
336    fn extend<I: IntoIterator<Item = char>>(&mut self, iter: I) {
337        for ch in iter {
338            self.push(ch);
339        }
340    }
341}
342
343impl<'a, const N: usize> Extend<&'a str> for SmallStr<N> {
344    fn extend<I: IntoIterator<Item = &'a str>>(&mut self, iter: I) {
345        for s in iter {
346            self.push_str(s);
347        }
348    }
349}
350
351impl<const N: usize> AsRef<str> for SmallStr<N> {
352    fn as_ref(&self) -> &str {
353        self.as_str()
354    }
355}
356
357impl<const N: usize> AsRef<[u8]> for SmallStr<N> {
358    fn as_ref(&self) -> &[u8] {
359        self.as_bytes()
360    }
361}
362
363impl<const N: usize> core::borrow::Borrow<str> for SmallStr<N> {
364    fn borrow(&self) -> &str {
365        self.as_str()
366    }
367}
368
369#[cfg(test)]
370mod tests {
371    extern crate alloc;
372    use alloc::format;
373    use alloc::string::{String, ToString};
374
375    use super::*;
376
377    #[test]
378    fn new_is_empty() {
379        let s = SmallStr::<16>::new();
380        assert!(s.is_empty());
381        assert_eq!(s.len(), 0);
382        assert_eq!(s.as_str(), "");
383    }
384
385    #[test]
386    fn push_str_inline() {
387        let mut s = SmallStr::<16>::new();
388        s.push_str("hello");
389        assert_eq!(s.as_str(), "hello");
390        assert!(s.is_inline());
391    }
392
393    #[test]
394    fn push_str_spills() {
395        let mut s = SmallStr::<4>::new();
396        s.push_str("hello world");
397        assert_eq!(s.as_str(), "hello world");
398        assert!(!s.is_inline());
399    }
400
401    #[test]
402    fn push_char() {
403        let mut s = SmallStr::<16>::new();
404        s.push('h');
405        s.push('i');
406        assert_eq!(s.as_str(), "hi");
407    }
408
409    #[test]
410    fn push_multibyte_char() {
411        let mut s = SmallStr::<16>::new();
412        s.push('\u{1F600}'); // emoji
413        assert_eq!(s.len(), 4);
414        assert_eq!(s.as_str().chars().next(), Some('\u{1F600}'));
415    }
416
417    #[test]
418    fn pop_char() {
419        let mut s = SmallStr::<16>::from("hello");
420        assert_eq!(s.pop(), Some('o'));
421        assert_eq!(s.as_str(), "hell");
422    }
423
424    #[test]
425    fn pop_empty() {
426        let mut s = SmallStr::<16>::new();
427        assert_eq!(s.pop(), None);
428    }
429
430    #[test]
431    fn pop_multibyte() {
432        let mut s = SmallStr::<16>::from("hi\u{1F600}");
433        assert_eq!(s.pop(), Some('\u{1F600}'));
434        assert_eq!(s.as_str(), "hi");
435    }
436
437    #[test]
438    fn clear() {
439        let mut s = SmallStr::<16>::from("hello");
440        s.clear();
441        assert!(s.is_empty());
442    }
443
444    #[test]
445    fn display_and_debug() {
446        let s = SmallStr::<16>::from("hello");
447        assert_eq!(format!("{s}"), "hello");
448        assert_eq!(format!("{s:?}"), "\"hello\"");
449    }
450
451    #[test]
452    fn fmt_write() {
453        use core::fmt::Write;
454        let mut s = SmallStr::<32>::new();
455        write!(s, "x = {}", 42).unwrap();
456        assert_eq!(s.as_str(), "x = 42");
457    }
458
459    #[test]
460    fn deref_to_str() {
461        let s = SmallStr::<16>::from("hello");
462        assert!(s.starts_with("hel"));
463        assert!(s.ends_with("llo"));
464    }
465
466    #[test]
467    fn eq_comparisons() {
468        let s = SmallStr::<16>::from("hello");
469        assert_eq!(s, "hello");
470        assert_eq!(s, *"hello");
471        assert_eq!(s, String::from("hello"));
472
473        let s2 = SmallStr::<16>::from("hello");
474        assert_eq!(s, s2);
475    }
476
477    #[test]
478    fn ord() {
479        let a = SmallStr::<16>::from("abc");
480        let b = SmallStr::<16>::from("abd");
481        assert!(a < b);
482    }
483
484    #[test]
485    fn from_str() {
486        let s: SmallStr<16> = "hello".parse().unwrap();
487        assert_eq!(s.as_str(), "hello");
488    }
489
490    #[test]
491    fn from_string() {
492        let s = SmallStr::<16>::from(String::from("hello"));
493        assert_eq!(s.as_str(), "hello");
494    }
495
496    #[test]
497    fn add_and_add_assign() {
498        let s = SmallStr::<16>::from("hello");
499        let s2 = s + " world";
500        assert_eq!(s2.as_str(), "hello world");
501
502        let mut s3 = SmallStr::<16>::from("hi");
503        s3 += "!";
504        assert_eq!(s3.as_str(), "hi!");
505    }
506
507    #[test]
508    fn extend_chars() {
509        let mut s = SmallStr::<16>::new();
510        s.extend(['h', 'i']);
511        assert_eq!(s.as_str(), "hi");
512    }
513
514    #[test]
515    fn extend_strs() {
516        let mut s = SmallStr::<16>::new();
517        s.extend(["hello", " ", "world"].iter().copied());
518        assert_eq!(s.as_str(), "hello world");
519    }
520
521    #[test]
522    fn into_string() {
523        let s = SmallStr::<16>::from("hello");
524        let string = s.into_string();
525        assert_eq!(string, "hello");
526    }
527
528    #[test]
529    fn into_bytes() {
530        let s = SmallStr::<16>::from("hi");
531        let bytes = s.into_bytes();
532        assert_eq!(bytes.as_slice(), b"hi");
533    }
534
535    #[test]
536    fn as_bytes() {
537        let s = SmallStr::<16>::from("hi");
538        assert_eq!(s.as_bytes(), b"hi");
539    }
540
541    #[test]
542    fn truncate() {
543        let mut s = SmallStr::<16>::from("hello");
544        s.truncate(3);
545        assert_eq!(s.as_str(), "hel");
546    }
547
548    #[test]
549    #[should_panic(expected = "not a char boundary")]
550    fn truncate_on_non_boundary() {
551        let mut s = SmallStr::<16>::from("héllo");
552        s.truncate(2); // 'é' is 2 bytes, index 2 is mid-char
553    }
554
555    #[test]
556    fn truncate_noop() {
557        let mut s = SmallStr::<16>::from("hi");
558        s.truncate(10);
559        assert_eq!(s.as_str(), "hi");
560    }
561
562    #[test]
563    fn insert_str() {
564        let mut s = SmallStr::<16>::from("hllo");
565        s.insert_str(1, "e");
566        assert_eq!(s.as_str(), "hello");
567    }
568
569    #[test]
570    fn insert_str_at_start() {
571        let mut s = SmallStr::<16>::from("world");
572        s.insert_str(0, "hello ");
573        assert_eq!(s.as_str(), "hello world");
574    }
575
576    #[test]
577    fn insert_str_at_end() {
578        let mut s = SmallStr::<16>::from("hello");
579        s.insert_str(5, " world");
580        assert_eq!(s.as_str(), "hello world");
581    }
582
583    #[test]
584    fn from_small_vec_valid() {
585        let mut v = SmallVec::<u8, 16>::new();
586        for &b in b"hello" {
587            v.push(b);
588        }
589        let s = SmallStr::from_small_vec(v).unwrap();
590        assert_eq!(s.as_str(), "hello");
591    }
592
593    #[test]
594    fn from_small_vec_invalid() {
595        let mut v = SmallVec::<u8, 16>::new();
596        v.push(0xFF);
597        v.push(0xFE);
598        assert!(SmallStr::from_small_vec(v).is_err());
599    }
600
601    #[test]
602    fn reserve_and_shrink() {
603        let mut s = SmallStr::<16>::from("hi");
604        s.reserve(100);
605        assert!(!s.is_inline());
606        s.shrink_to_fit();
607        assert!(s.is_inline());
608        assert_eq!(s.as_str(), "hi");
609    }
610
611    #[test]
612    fn spill() {
613        let mut s = SmallStr::<16>::from("hi");
614        assert!(s.is_inline());
615        s.spill();
616        assert!(!s.is_inline());
617        assert_eq!(s.as_str(), "hi");
618    }
619
620    #[test]
621    fn clone() {
622        let s = SmallStr::<16>::from("hello");
623        let s2 = s.clone();
624        assert_eq!(s.as_str(), s2.as_str());
625    }
626
627    #[test]
628    fn as_ref_str() {
629        let s = SmallStr::<16>::from("hello");
630        let r: &str = s.as_ref();
631        assert_eq!(r, "hello");
632    }
633
634    #[test]
635    fn as_ref_bytes() {
636        let s = SmallStr::<16>::from("hi");
637        let r: &[u8] = s.as_ref();
638        assert_eq!(r, b"hi");
639    }
640
641    #[test]
642    fn borrow_str() {
643        use core::borrow::Borrow;
644        let s = SmallStr::<16>::from("hello");
645        let b: &str = s.borrow();
646        assert_eq!(b, "hello");
647    }
648
649    #[test]
650    fn to_string() {
651        let s = SmallStr::<16>::from("hello");
652        let string = s.to_string();
653        assert_eq!(string, "hello");
654    }
655}