generic_str/
owned_utf8.rs

1use core::str::Utf8Error;
2use std::mem::MaybeUninit;
3
4use generic_vec::{
5    raw::{AllocResult, Storage, StorageWithCapacity},
6    ArrayVec, GenericVec,
7};
8
9#[cfg(feature = "alloc")]
10use std::alloc::{Allocator, Global};
11
12use crate::{string_base::StringBase, OwnedString};
13
14/// Exactly the same as [`std::string::String`], except generic
15///
16/// ```
17/// # use generic_str::{str, String};
18/// let mut s = String::new();
19/// s.push_str("foobar".into());
20/// assert_eq!(s, <&str>::from("foobar"));
21/// ```
22#[cfg(feature = "alloc")]
23pub type String<A = Global> = OwnedString<u8, Box<[MaybeUninit<u8>], A>>;
24
25/// Same API as [`String`] but without any re-allocation. Can only hold up to `N` bytes
26///
27/// ```
28/// # use generic_str::{str, ArrayString};
29/// let mut s = ArrayString::<8>::new();
30/// assert_eq!(std::mem::size_of_val(&s), 8 + 8); // 8 bytes of storage, 8 bytes for length
31///
32/// s.push_str("foo".into());
33/// let t = s.clone(); // cloning requires no heap allocations
34/// s.push_str("bar".into());
35///
36/// assert_eq!(t, <&str>::from("foo"));
37/// assert_eq!(s, <&str>::from("foobar"));
38/// ```
39pub type ArrayString<const N: usize> = OwnedString<u8, [MaybeUninit<u8>; N]>;
40
41#[cfg(feature = "alloc")]
42impl String {
43    /// Creates a new empty `String`.
44    ///
45    /// Given that the `String` is empty, this will not allocate any initial
46    /// buffer. While that means that this initial operation is very
47    /// inexpensive, it may cause excessive allocation later when you add
48    /// data. If you have an idea of how much data the `String` will hold,
49    /// consider the [`with_capacity`] method to prevent excessive
50    /// re-allocation.
51    ///
52    /// [`with_capacity`]: String::with_capacity
53    ///
54    /// # Examples
55    ///
56    /// Basic usage:
57    ///
58    /// ```
59    /// # use generic_str::String;
60    /// let s = String::new();
61    /// ```
62    #[inline]
63    pub fn new() -> Self {
64        Self::with_storage(Box::default())
65    }
66
67    /// Creates a new empty `String` with a particular capacity.
68    ///
69    /// `String`s have an internal buffer to hold their data. The capacity is
70    /// the length of that buffer, and can be queried with the [`capacity`]
71    /// method. This method creates an empty `String`, but one with an initial
72    /// buffer that can hold `capacity` bytes. This is useful when you may be
73    /// appending a bunch of data to the `String`, reducing the number of
74    /// reallocations it needs to do.
75    ///
76    /// [`capacity`]: StringBase::capacity
77    ///
78    /// If the given capacity is `0`, no allocation will occur, and this method
79    /// is identical to the [`new`] method.
80    ///
81    /// [`new`]: StringBase::new
82    ///
83    /// # Examples
84    ///
85    /// Basic usage:
86    ///
87    /// ```
88    /// # use generic_str::String;
89    /// let mut s = String::with_capacity(10);
90    ///
91    /// // The String contains no chars, even though it has capacity for more
92    /// assert_eq!(s.len(), 0);
93    ///
94    /// // These are all done without reallocating...
95    /// let cap = s.capacity();
96    /// for _ in 0..10 {
97    ///     s.push('a');
98    /// }
99    ///
100    /// assert_eq!(s.capacity(), cap);
101    ///
102    /// // ...but this may make the string reallocate
103    /// s.push('a');
104    /// ```
105    #[inline]
106    pub fn with_capacity(capacity: usize) -> Self {
107        Self::new_with_capacity(capacity)
108    }
109}
110
111#[cfg(feature = "alloc")]
112impl<A: Allocator> String<A> {
113    pub fn with_alloc(alloc: A) -> Self {
114        Self::with_storage(Box::new_uninit_slice_in(0, alloc))
115    }
116}
117
118impl<const N: usize> ArrayString<N> {
119    /// Creates a new empty `ArrayString`.
120    ///
121    /// # Examples
122    ///
123    /// Basic usage:
124    ///
125    /// ```
126    /// # use generic_str::ArrayString;
127    /// let s = ArrayString::<8>::new();
128    /// ```
129    #[inline]
130    pub fn new() -> Self {
131        Self {
132            storage: ArrayVec::new(),
133        }
134    }
135}
136
137#[derive(PartialEq, Eq)]
138pub struct FromUtf8Error<S: Storage<Item = u8>> {
139    bytes: GenericVec<u8, S>,
140    error: Utf8Error,
141}
142
143use core::fmt;
144impl<S: Storage<Item = u8>> fmt::Debug for FromUtf8Error<S> {
145    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
146        f.debug_struct("FromUtf8Error")
147            .field("bytes", &self.bytes)
148            .field("error", &self.error)
149            .finish()
150    }
151}
152
153impl<S: ?Sized + Storage<Item = u8>> OwnedString<u8, S> {
154    /// Converts a vector of bytes to a `String`.
155    ///
156    /// A string ([`String`]) is made of bytes ([`u8`]), and a vector of bytes
157    /// ([`Vec<u8>`]) is made of bytes, so this function converts between the
158    /// two. Not all byte slices are valid `String`s, however: `String`
159    /// requires that it is valid UTF-8. `from_utf8()` checks to ensure that
160    /// the bytes are valid UTF-8, and then does the conversion.
161    ///
162    /// If you are sure that the byte slice is valid UTF-8, and you don't want
163    /// to incur the overhead of the validity check, there is an unsafe version
164    /// of this function, [`from_utf8_unchecked`], which has the same behavior
165    /// but skips the check.
166    ///
167    /// This method will take care to not copy the vector, for efficiency's
168    /// sake.
169    ///
170    /// If you need a [`&str`] instead of a `String`, consider
171    /// [`from_utf8`].
172    ///
173    /// [`from_utf8`]: crate::from_utf8
174    ///
175    /// The inverse of this method is [`into_bytes`].
176    ///
177    /// # Errors
178    ///
179    /// Returns [`Err`] if the slice is not UTF-8 with a description as to why the
180    /// provided bytes are not UTF-8. The vector you moved in is also included.
181    ///
182    /// # Examples
183    ///
184    /// Basic usage:
185    ///
186    /// ```
187    /// # use generic_str::{str, String};
188    /// // some bytes, in a vector
189    /// let sparkle_heart = vec![240, 159, 146, 150];
190    ///
191    /// // We know these bytes are valid, so we'll use `unwrap()`.
192    /// let sparkle_heart = String::from_utf8(sparkle_heart.into()).unwrap();
193    ///
194    /// assert_eq!(sparkle_heart, <&str>::from("💖"));
195    /// ```
196    ///
197    /// Incorrect bytes:
198    ///
199    /// ```
200    /// # use generic_str::String;
201    /// // some invalid bytes, in a vector
202    /// let sparkle_heart = vec![0, 159, 146, 150];
203    ///
204    /// assert!(String::from_utf8(sparkle_heart.into()).is_err());
205    /// ```
206    ///
207    /// See the docs for [`FromUtf8Error`] for more details on what you can do
208    /// with this error.
209    ///
210    /// [`from_utf8_unchecked`]: StringBase::from_utf8_unchecked
211    /// [`Vec<u8>`]: std::vec::Vec
212    /// [`&str`]: prim@str
213    /// [`into_bytes`]: StringBase::into_bytes
214    #[inline]
215    pub fn from_utf8(vec: GenericVec<S::Item, S>) -> Result<Self, FromUtf8Error<S>>
216    where
217        S: Sized,
218    {
219        match core::str::from_utf8(&vec) {
220            Ok(..) => Ok(Self { storage: vec }),
221            Err(e) => Err(FromUtf8Error {
222                bytes: vec,
223                error: e,
224            }),
225        }
226    }
227    /// Converts a vector of bytes to a `String` without checking that the
228    /// string contains valid UTF-8.
229    ///
230    /// See the safe version, [`from_utf8`], for more details.
231    ///
232    /// [`from_utf8`]: StringBase::from_utf8
233    ///
234    /// # Safety
235    ///
236    /// This function is unsafe because it does not check that the bytes passed
237    /// to it are valid UTF-8. If this constraint is violated, it may cause
238    /// memory unsafety issues with future users of the `String`, as the rest of
239    /// the standard library assumes that `String`s are valid UTF-8.
240    ///
241    /// # Examples
242    ///
243    /// Basic usage:
244    ///
245    /// ```
246    /// # use generic_str::{str, String};
247    /// // some bytes, in a vector
248    /// let sparkle_heart = vec![240, 159, 146, 150];
249    ///
250    /// let sparkle_heart = unsafe {
251    ///     String::from_utf8_unchecked(sparkle_heart.into())
252    /// };
253    ///
254    /// assert_eq!(sparkle_heart, <&str>::from("💖"));
255    /// ```
256    #[inline]
257    pub unsafe fn from_utf8_unchecked(vec: GenericVec<S::Item, S>) -> Self
258    where
259        S: Sized,
260    {
261        Self { storage: vec }
262    }
263    /// Converts a `String` into a byte vector.
264    ///
265    /// This consumes the `String`, so we do not need to copy its contents.
266    ///
267    /// # Examples
268    ///
269    /// Basic usage:
270    ///
271    /// ```
272    /// # use generic_str::String;
273    /// let s = String::from("hello");
274    /// let bytes = s.into_bytes();
275    ///
276    /// assert_eq!(&[104, 101, 108, 108, 111][..], &bytes[..]);
277    /// ```
278    #[inline]
279    pub fn into_bytes(self) -> GenericVec<S::Item, S>
280    where
281        S: Sized,
282    {
283        self.storage
284    }
285    /// Extracts a string slice containing the entire `String`.
286    ///
287    /// # Examples
288    ///
289    /// Basic usage:
290    ///
291    /// ```
292    /// # use generic_str::{str, String};
293    /// let s = String::from("foo");
294    ///
295    /// assert_eq!(s.as_str(), <&str>::from("foo"));
296    /// ```
297    #[inline]
298    pub fn as_str(&self) -> &crate::str {
299        self
300    }
301    /// Converts a `String` into a mutable string slice.
302    ///
303    /// # Examples
304    ///
305    /// Basic usage:
306    ///
307    /// ```
308    /// # use generic_str::{str, String};
309    /// let mut s = String::from("foobar");
310    /// let s_mut_str = s.as_mut_str();
311    ///
312    /// s_mut_str.make_ascii_uppercase();
313    ///
314    /// assert_eq!(s_mut_str, <&str>::from("FOOBAR"));
315    /// ```
316    #[inline]
317    pub fn as_mut_str(&mut self) -> &mut crate::str {
318        self
319    }
320    /// Appends a given string slice onto the end of this `String`.
321    ///
322    /// # Examples
323    ///
324    /// Basic usage:
325    ///
326    /// ```
327    /// # use generic_str::{str, String};
328    /// let mut s = String::from("foo");
329    ///
330    /// s.push_str("bar".into());
331    ///
332    /// assert_eq!(s, <&str>::from("foobar"));
333    /// ```
334    #[inline]
335    pub fn push_str(&mut self, string: &crate::str) {
336        self.storage.extend_from_slice(&string.storage)
337    }
338    /// Ensures that this `String`'s capacity is at least `additional` bytes
339    /// larger than its length.
340    ///
341    /// The capacity may be increased by more than `additional` bytes if it
342    /// chooses, to prevent frequent reallocations.
343    ///
344    /// # Panics
345    ///
346    /// Panics if the new capacity overflows [`usize`].
347    ///
348    /// # Examples
349    ///
350    /// Basic usage:
351    ///
352    /// ```
353    /// # use generic_str::String;
354    /// let mut s = String::new();
355    ///
356    /// s.reserve(10);
357    ///
358    /// assert!(s.capacity() >= 10);
359    /// ```
360    ///
361    /// This may not actually increase the capacity:
362    ///
363    /// ```
364    /// # use generic_str::String;
365    /// let mut s = String::with_capacity(10);
366    /// s.push('a');
367    /// s.push('b');
368    ///
369    /// // s now has a length of 2 and a capacity of 10
370    /// assert_eq!(2, s.len());
371    /// assert_eq!(10, s.capacity());
372    ///
373    /// // Since we already have an extra 8 capacity, calling this...
374    /// s.reserve(8);
375    ///
376    /// // ... doesn't actually increase.
377    /// assert_eq!(10, s.capacity());
378    /// ```
379    #[inline]
380    pub fn reserve(&mut self, additional: usize) {
381        self.storage.reserve(additional)
382    }
383    /// Tries to reserve capacity for at least `additional` more elements to be inserted
384    /// in the given `String`. The collection may reserve more space to avoid
385    /// frequent reallocations. After calling `reserve`, capacity will be
386    /// greater than or equal to `self.len() + additional`. Does nothing if
387    /// capacity is already sufficient.
388    ///
389    /// # Errors
390    ///
391    /// If the capacity overflows, or the allocator reports a failure, then an error
392    /// is returned.
393    pub fn try_reserve(&mut self, additional: usize) -> AllocResult {
394        self.storage.try_reserve(additional)
395    }
396    /// Appends the given [`char`] to the end of this `String`.
397    ///
398    /// # Examples
399    ///
400    /// Basic usage:
401    ///
402    /// ```
403    /// # use generic_str::{str, String};
404    /// let mut s = String::from("abc");
405    ///
406    /// s.push('1');
407    /// s.push('2');
408    /// s.push('3');
409    ///
410    /// assert_eq!(s, <&str>::from("abc123"));
411    /// ```
412    #[inline]
413    pub fn push(&mut self, ch: char) {
414        match ch.len_utf8() {
415            1 => {
416                self.storage.push(ch as u8);
417            }
418            _ => self
419                .storage
420                .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()),
421        }
422    }
423
424    /// Removes the last character from the string buffer and returns it.
425    ///
426    /// Returns [`None`] if this `String` is empty.
427    ///
428    /// # Examples
429    ///
430    /// Basic usage:
431    ///
432    /// ```
433    /// # use generic_str::String;
434    /// let mut s = String::from("foo");
435    ///
436    /// assert_eq!(s.pop(), Some('o'));
437    /// assert_eq!(s.pop(), Some('o'));
438    /// assert_eq!(s.pop(), Some('f'));
439    ///
440    /// assert_eq!(s.pop(), None);
441    /// ```
442    #[inline]
443    pub fn pop(&mut self) -> Option<char> {
444        let ch = self.chars().rev().next()?;
445        let newlen = self.len() - ch.len_utf8();
446        unsafe {
447            self.storage.set_len_unchecked(newlen);
448        }
449        Some(ch)
450    }
451
452    /// Shortens this `String` to the specified length.
453    ///
454    /// If `new_len` is greater than the string's current length, this has no
455    /// effect.
456    ///
457    /// Note that this method has no effect on the allocated capacity
458    /// of the string
459    ///
460    /// # Panics
461    ///
462    /// Panics if `new_len` does not lie on a [`char`] boundary.
463    ///
464    /// # Examples
465    ///
466    /// Basic usage:
467    ///
468    /// ```
469    /// # use generic_str::{str, String};
470    /// let mut s = String::from("hello");
471    ///
472    /// s.truncate(2);
473    ///
474    /// assert_eq!(s, <&str>::from("he"));
475    /// ```
476    #[inline]
477    pub fn truncate(&mut self, new_len: usize) {
478        if new_len <= self.len() {
479            assert!(self.is_char_boundary(new_len));
480            self.storage.truncate(new_len)
481        }
482    }
483
484    /// Removes a [`char`] from this `String` at a byte position and returns it.
485    ///
486    /// This is an *O*(*n*) operation, as it requires copying every element in the
487    /// buffer.
488    ///
489    /// # Panics
490    ///
491    /// Panics if `idx` is larger than or equal to the `String`'s length,
492    /// or if it does not lie on a [`char`] boundary.
493    ///
494    /// # Examples
495    ///
496    /// Basic usage:
497    ///
498    /// ```
499    /// # use generic_str::String;
500    /// let mut s = String::from("foo");
501    ///
502    /// assert_eq!(s.remove(0), 'f');
503    /// assert_eq!(s.remove(1), 'o');
504    /// assert_eq!(s.remove(0), 'o');
505    /// ```
506    #[inline]
507    pub fn remove(&mut self, idx: usize) -> char {
508        let ch = match self[idx..].chars().next() {
509            Some(ch) => ch,
510            None => panic!("cannot remove a char from the end of a string"),
511        };
512
513        let next = idx + ch.len_utf8();
514        let len = self.len();
515        unsafe {
516            core::ptr::copy(
517                self.storage.as_ptr().add(next),
518                self.storage.as_mut_ptr().add(idx),
519                len - next,
520            );
521            self.storage.set_len_unchecked(len - (next - idx));
522        }
523        ch
524    }
525
526    /// Inserts a character into this `String` at a byte position.
527    ///
528    /// This is an *O*(*n*) operation as it requires copying every element in the
529    /// buffer.
530    ///
531    /// # Panics
532    ///
533    /// Panics if `idx` is larger than the `String`'s length, or if it does not
534    /// lie on a [`char`] boundary.
535    ///
536    /// # Examples
537    ///
538    /// Basic usage:
539    ///
540    /// ```
541    /// # use generic_str::{str, String};
542    /// let mut s = String::with_capacity(3);
543    ///
544    /// s.insert(0, 'f');
545    /// s.insert(1, 'o');
546    /// s.insert(2, 'o');
547    ///
548    /// assert_eq!(s, <&str>::from("foo"));
549    /// ```
550    #[inline]
551    pub fn insert(&mut self, idx: usize, ch: char) {
552        assert!(self.is_char_boundary(idx));
553        let mut bits = [0; 4];
554        let bits = ch.encode_utf8(&mut bits).as_bytes();
555
556        unsafe {
557            self.insert_bytes(idx, bits);
558        }
559    }
560
561    unsafe fn insert_bytes(&mut self, idx: usize, bytes: &[u8]) {
562        let len = self.len();
563        let amt = bytes.len();
564        self.storage.reserve(amt);
565
566        core::ptr::copy(
567            self.storage.as_ptr().add(idx),
568            self.storage.as_mut_ptr().add(idx + amt),
569            len - idx,
570        );
571        core::ptr::copy(bytes.as_ptr(), self.storage.as_mut_ptr().add(idx), amt);
572        self.storage.set_len_unchecked(len + amt);
573    }
574
575    /// Inserts a string slice into this `String` at a byte position.
576    ///
577    /// This is an *O*(*n*) operation as it requires copying every element in the
578    /// buffer.
579    ///
580    /// # Panics
581    ///
582    /// Panics if `idx` is larger than the `String`'s length, or if it does not
583    /// lie on a [`char`] boundary.
584    ///
585    /// # Examples
586    ///
587    /// Basic usage:
588    ///
589    /// ```
590    /// # use generic_str::{str, String};
591    /// let mut s = String::from("bar");
592    ///
593    /// s.insert_str(0, "foo");
594    ///
595    /// assert_eq!(s, <&str>::from("foobar"));
596    /// ```
597    #[inline]
598    pub fn insert_str(&mut self, idx: usize, string: &str) {
599        assert!(self.is_char_boundary(idx));
600
601        unsafe {
602            self.insert_bytes(idx, string.as_bytes());
603        }
604    }
605
606    /// Returns a mutable reference to the contents of this `String`.
607    ///
608    /// # Safety
609    ///
610    /// This function is unsafe because it does not check that the bytes passed
611    /// to it are valid UTF-8. If this constraint is violated, it may cause
612    /// memory unsafety issues with future users of the `String`, as the rest of
613    /// the standard library assumes that `String`s are valid UTF-8.
614    ///
615    /// # Examples
616    ///
617    /// Basic usage:
618    ///
619    /// ```
620    /// # use generic_str::{str, String};
621    /// let mut s = String::from("hello");
622    ///
623    /// unsafe {
624    ///     let vec = s.as_mut_vec();
625    ///     assert_eq!(&[104, 101, 108, 108, 111][..], &vec[..]);
626    ///
627    ///     vec.reverse();
628    /// }
629    /// assert_eq!(s, <&str>::from("olleh"));
630    /// ```
631    #[inline]
632    pub unsafe fn as_mut_vec(&mut self) -> &mut GenericVec<S::Item, S> {
633        &mut self.storage
634    }
635
636    /// Splits the string into two at the given byte index.
637    ///
638    /// Returns a newly allocated `String`. `self` contains bytes `[0, at)`, and
639    /// the returned `String` contains bytes `[at, len)`. `at` must be on the
640    /// boundary of a UTF-8 code point.
641    ///
642    /// Note that the capacity of `self` does not change.
643    ///
644    /// # Panics
645    ///
646    /// Panics if `at` is not on a `UTF-8` code point boundary, or if it is beyond the last
647    /// code point of the string.
648    ///
649    /// # Examples
650    ///
651    /// ```
652    /// # use generic_str::{str, String};
653    /// # fn main() {
654    /// let mut hello = String::from("Hello, World!");
655    /// let world: String = hello.split_off(7);
656    /// assert_eq!(hello, <&str>::from("Hello, "));
657    /// assert_eq!(world, <&str>::from("World!"));
658    /// # }
659    /// ```
660    #[inline]
661    #[must_use = "use `.truncate()` if you don't need the other half"]
662    pub fn split_off<B: ?Sized + StorageWithCapacity<Item = u8>>(
663        &mut self,
664        at: usize,
665    ) -> StringBase<GenericVec<S::Item, B>> {
666        assert!(self.is_char_boundary(at));
667        let other = self.storage.split_off(at);
668        unsafe { StringBase::from_utf8_unchecked(other) }
669    }
670
671    /// Truncates this `String`, removing all contents.
672    ///
673    /// While this means the `String` will have a length of zero, it does not
674    /// touch its capacity.
675    ///
676    /// # Examples
677    ///
678    /// Basic usage:
679    ///
680    /// ```
681    /// # use generic_str::String;
682    /// let mut s = String::from("foo");
683    ///
684    /// s.clear();
685    ///
686    /// assert!(s.is_empty());
687    /// assert_eq!(0, s.len());
688    /// assert_eq!(3, s.capacity());
689    /// ```
690    #[inline]
691    pub fn clear(&mut self) {
692        self.storage.clear()
693    }
694
695    /// Returns this `String`'s capacity, in bytes.
696    ///
697    /// # Examples
698    ///
699    /// Basic usage:
700    ///
701    /// ```
702    /// # use generic_str::String;
703    /// let s = String::with_capacity(10);
704    ///
705    /// assert!(s.capacity() >= 10);
706    /// ```
707    #[inline]
708    pub fn capacity(&self) -> usize {
709        self.storage.capacity()
710    }
711}