generic_str/owned_utf8.rs
1use core::str::Utf8Error;
2use std::mem::MaybeUninit;
3
4use generic_vec::{
5 raw::{AllocResult, Storage, StorageWithCapacity},
6 ArrayVec, GenericVec,
7};
8
9#[cfg(feature = "alloc")]
10use std::alloc::{Allocator, Global};
11
12use crate::{string_base::StringBase, OwnedString};
13
14/// Exactly the same as [`std::string::String`], except generic
15///
16/// ```
17/// # use generic_str::{str, String};
18/// let mut s = String::new();
19/// s.push_str("foobar".into());
20/// assert_eq!(s, <&str>::from("foobar"));
21/// ```
22#[cfg(feature = "alloc")]
23pub type String<A = Global> = OwnedString<u8, Box<[MaybeUninit<u8>], A>>;
24
25/// Same API as [`String`] but without any re-allocation. Can only hold up to `N` bytes
26///
27/// ```
28/// # use generic_str::{str, ArrayString};
29/// let mut s = ArrayString::<8>::new();
30/// assert_eq!(std::mem::size_of_val(&s), 8 + 8); // 8 bytes of storage, 8 bytes for length
31///
32/// s.push_str("foo".into());
33/// let t = s.clone(); // cloning requires no heap allocations
34/// s.push_str("bar".into());
35///
36/// assert_eq!(t, <&str>::from("foo"));
37/// assert_eq!(s, <&str>::from("foobar"));
38/// ```
39pub type ArrayString<const N: usize> = OwnedString<u8, [MaybeUninit<u8>; N]>;
40
41#[cfg(feature = "alloc")]
42impl String {
43 /// Creates a new empty `String`.
44 ///
45 /// Given that the `String` is empty, this will not allocate any initial
46 /// buffer. While that means that this initial operation is very
47 /// inexpensive, it may cause excessive allocation later when you add
48 /// data. If you have an idea of how much data the `String` will hold,
49 /// consider the [`with_capacity`] method to prevent excessive
50 /// re-allocation.
51 ///
52 /// [`with_capacity`]: String::with_capacity
53 ///
54 /// # Examples
55 ///
56 /// Basic usage:
57 ///
58 /// ```
59 /// # use generic_str::String;
60 /// let s = String::new();
61 /// ```
62 #[inline]
63 pub fn new() -> Self {
64 Self::with_storage(Box::default())
65 }
66
67 /// Creates a new empty `String` with a particular capacity.
68 ///
69 /// `String`s have an internal buffer to hold their data. The capacity is
70 /// the length of that buffer, and can be queried with the [`capacity`]
71 /// method. This method creates an empty `String`, but one with an initial
72 /// buffer that can hold `capacity` bytes. This is useful when you may be
73 /// appending a bunch of data to the `String`, reducing the number of
74 /// reallocations it needs to do.
75 ///
76 /// [`capacity`]: StringBase::capacity
77 ///
78 /// If the given capacity is `0`, no allocation will occur, and this method
79 /// is identical to the [`new`] method.
80 ///
81 /// [`new`]: StringBase::new
82 ///
83 /// # Examples
84 ///
85 /// Basic usage:
86 ///
87 /// ```
88 /// # use generic_str::String;
89 /// let mut s = String::with_capacity(10);
90 ///
91 /// // The String contains no chars, even though it has capacity for more
92 /// assert_eq!(s.len(), 0);
93 ///
94 /// // These are all done without reallocating...
95 /// let cap = s.capacity();
96 /// for _ in 0..10 {
97 /// s.push('a');
98 /// }
99 ///
100 /// assert_eq!(s.capacity(), cap);
101 ///
102 /// // ...but this may make the string reallocate
103 /// s.push('a');
104 /// ```
105 #[inline]
106 pub fn with_capacity(capacity: usize) -> Self {
107 Self::new_with_capacity(capacity)
108 }
109}
110
111#[cfg(feature = "alloc")]
112impl<A: Allocator> String<A> {
113 pub fn with_alloc(alloc: A) -> Self {
114 Self::with_storage(Box::new_uninit_slice_in(0, alloc))
115 }
116}
117
118impl<const N: usize> ArrayString<N> {
119 /// Creates a new empty `ArrayString`.
120 ///
121 /// # Examples
122 ///
123 /// Basic usage:
124 ///
125 /// ```
126 /// # use generic_str::ArrayString;
127 /// let s = ArrayString::<8>::new();
128 /// ```
129 #[inline]
130 pub fn new() -> Self {
131 Self {
132 storage: ArrayVec::new(),
133 }
134 }
135}
136
137#[derive(PartialEq, Eq)]
138pub struct FromUtf8Error<S: Storage<Item = u8>> {
139 bytes: GenericVec<u8, S>,
140 error: Utf8Error,
141}
142
143use core::fmt;
144impl<S: Storage<Item = u8>> fmt::Debug for FromUtf8Error<S> {
145 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
146 f.debug_struct("FromUtf8Error")
147 .field("bytes", &self.bytes)
148 .field("error", &self.error)
149 .finish()
150 }
151}
152
153impl<S: ?Sized + Storage<Item = u8>> OwnedString<u8, S> {
154 /// Converts a vector of bytes to a `String`.
155 ///
156 /// A string ([`String`]) is made of bytes ([`u8`]), and a vector of bytes
157 /// ([`Vec<u8>`]) is made of bytes, so this function converts between the
158 /// two. Not all byte slices are valid `String`s, however: `String`
159 /// requires that it is valid UTF-8. `from_utf8()` checks to ensure that
160 /// the bytes are valid UTF-8, and then does the conversion.
161 ///
162 /// If you are sure that the byte slice is valid UTF-8, and you don't want
163 /// to incur the overhead of the validity check, there is an unsafe version
164 /// of this function, [`from_utf8_unchecked`], which has the same behavior
165 /// but skips the check.
166 ///
167 /// This method will take care to not copy the vector, for efficiency's
168 /// sake.
169 ///
170 /// If you need a [`&str`] instead of a `String`, consider
171 /// [`from_utf8`].
172 ///
173 /// [`from_utf8`]: crate::from_utf8
174 ///
175 /// The inverse of this method is [`into_bytes`].
176 ///
177 /// # Errors
178 ///
179 /// Returns [`Err`] if the slice is not UTF-8 with a description as to why the
180 /// provided bytes are not UTF-8. The vector you moved in is also included.
181 ///
182 /// # Examples
183 ///
184 /// Basic usage:
185 ///
186 /// ```
187 /// # use generic_str::{str, String};
188 /// // some bytes, in a vector
189 /// let sparkle_heart = vec![240, 159, 146, 150];
190 ///
191 /// // We know these bytes are valid, so we'll use `unwrap()`.
192 /// let sparkle_heart = String::from_utf8(sparkle_heart.into()).unwrap();
193 ///
194 /// assert_eq!(sparkle_heart, <&str>::from("💖"));
195 /// ```
196 ///
197 /// Incorrect bytes:
198 ///
199 /// ```
200 /// # use generic_str::String;
201 /// // some invalid bytes, in a vector
202 /// let sparkle_heart = vec![0, 159, 146, 150];
203 ///
204 /// assert!(String::from_utf8(sparkle_heart.into()).is_err());
205 /// ```
206 ///
207 /// See the docs for [`FromUtf8Error`] for more details on what you can do
208 /// with this error.
209 ///
210 /// [`from_utf8_unchecked`]: StringBase::from_utf8_unchecked
211 /// [`Vec<u8>`]: std::vec::Vec
212 /// [`&str`]: prim@str
213 /// [`into_bytes`]: StringBase::into_bytes
214 #[inline]
215 pub fn from_utf8(vec: GenericVec<S::Item, S>) -> Result<Self, FromUtf8Error<S>>
216 where
217 S: Sized,
218 {
219 match core::str::from_utf8(&vec) {
220 Ok(..) => Ok(Self { storage: vec }),
221 Err(e) => Err(FromUtf8Error {
222 bytes: vec,
223 error: e,
224 }),
225 }
226 }
227 /// Converts a vector of bytes to a `String` without checking that the
228 /// string contains valid UTF-8.
229 ///
230 /// See the safe version, [`from_utf8`], for more details.
231 ///
232 /// [`from_utf8`]: StringBase::from_utf8
233 ///
234 /// # Safety
235 ///
236 /// This function is unsafe because it does not check that the bytes passed
237 /// to it are valid UTF-8. If this constraint is violated, it may cause
238 /// memory unsafety issues with future users of the `String`, as the rest of
239 /// the standard library assumes that `String`s are valid UTF-8.
240 ///
241 /// # Examples
242 ///
243 /// Basic usage:
244 ///
245 /// ```
246 /// # use generic_str::{str, String};
247 /// // some bytes, in a vector
248 /// let sparkle_heart = vec![240, 159, 146, 150];
249 ///
250 /// let sparkle_heart = unsafe {
251 /// String::from_utf8_unchecked(sparkle_heart.into())
252 /// };
253 ///
254 /// assert_eq!(sparkle_heart, <&str>::from("💖"));
255 /// ```
256 #[inline]
257 pub unsafe fn from_utf8_unchecked(vec: GenericVec<S::Item, S>) -> Self
258 where
259 S: Sized,
260 {
261 Self { storage: vec }
262 }
263 /// Converts a `String` into a byte vector.
264 ///
265 /// This consumes the `String`, so we do not need to copy its contents.
266 ///
267 /// # Examples
268 ///
269 /// Basic usage:
270 ///
271 /// ```
272 /// # use generic_str::String;
273 /// let s = String::from("hello");
274 /// let bytes = s.into_bytes();
275 ///
276 /// assert_eq!(&[104, 101, 108, 108, 111][..], &bytes[..]);
277 /// ```
278 #[inline]
279 pub fn into_bytes(self) -> GenericVec<S::Item, S>
280 where
281 S: Sized,
282 {
283 self.storage
284 }
285 /// Extracts a string slice containing the entire `String`.
286 ///
287 /// # Examples
288 ///
289 /// Basic usage:
290 ///
291 /// ```
292 /// # use generic_str::{str, String};
293 /// let s = String::from("foo");
294 ///
295 /// assert_eq!(s.as_str(), <&str>::from("foo"));
296 /// ```
297 #[inline]
298 pub fn as_str(&self) -> &crate::str {
299 self
300 }
301 /// Converts a `String` into a mutable string slice.
302 ///
303 /// # Examples
304 ///
305 /// Basic usage:
306 ///
307 /// ```
308 /// # use generic_str::{str, String};
309 /// let mut s = String::from("foobar");
310 /// let s_mut_str = s.as_mut_str();
311 ///
312 /// s_mut_str.make_ascii_uppercase();
313 ///
314 /// assert_eq!(s_mut_str, <&str>::from("FOOBAR"));
315 /// ```
316 #[inline]
317 pub fn as_mut_str(&mut self) -> &mut crate::str {
318 self
319 }
320 /// Appends a given string slice onto the end of this `String`.
321 ///
322 /// # Examples
323 ///
324 /// Basic usage:
325 ///
326 /// ```
327 /// # use generic_str::{str, String};
328 /// let mut s = String::from("foo");
329 ///
330 /// s.push_str("bar".into());
331 ///
332 /// assert_eq!(s, <&str>::from("foobar"));
333 /// ```
334 #[inline]
335 pub fn push_str(&mut self, string: &crate::str) {
336 self.storage.extend_from_slice(&string.storage)
337 }
338 /// Ensures that this `String`'s capacity is at least `additional` bytes
339 /// larger than its length.
340 ///
341 /// The capacity may be increased by more than `additional` bytes if it
342 /// chooses, to prevent frequent reallocations.
343 ///
344 /// # Panics
345 ///
346 /// Panics if the new capacity overflows [`usize`].
347 ///
348 /// # Examples
349 ///
350 /// Basic usage:
351 ///
352 /// ```
353 /// # use generic_str::String;
354 /// let mut s = String::new();
355 ///
356 /// s.reserve(10);
357 ///
358 /// assert!(s.capacity() >= 10);
359 /// ```
360 ///
361 /// This may not actually increase the capacity:
362 ///
363 /// ```
364 /// # use generic_str::String;
365 /// let mut s = String::with_capacity(10);
366 /// s.push('a');
367 /// s.push('b');
368 ///
369 /// // s now has a length of 2 and a capacity of 10
370 /// assert_eq!(2, s.len());
371 /// assert_eq!(10, s.capacity());
372 ///
373 /// // Since we already have an extra 8 capacity, calling this...
374 /// s.reserve(8);
375 ///
376 /// // ... doesn't actually increase.
377 /// assert_eq!(10, s.capacity());
378 /// ```
379 #[inline]
380 pub fn reserve(&mut self, additional: usize) {
381 self.storage.reserve(additional)
382 }
383 /// Tries to reserve capacity for at least `additional` more elements to be inserted
384 /// in the given `String`. The collection may reserve more space to avoid
385 /// frequent reallocations. After calling `reserve`, capacity will be
386 /// greater than or equal to `self.len() + additional`. Does nothing if
387 /// capacity is already sufficient.
388 ///
389 /// # Errors
390 ///
391 /// If the capacity overflows, or the allocator reports a failure, then an error
392 /// is returned.
393 pub fn try_reserve(&mut self, additional: usize) -> AllocResult {
394 self.storage.try_reserve(additional)
395 }
396 /// Appends the given [`char`] to the end of this `String`.
397 ///
398 /// # Examples
399 ///
400 /// Basic usage:
401 ///
402 /// ```
403 /// # use generic_str::{str, String};
404 /// let mut s = String::from("abc");
405 ///
406 /// s.push('1');
407 /// s.push('2');
408 /// s.push('3');
409 ///
410 /// assert_eq!(s, <&str>::from("abc123"));
411 /// ```
412 #[inline]
413 pub fn push(&mut self, ch: char) {
414 match ch.len_utf8() {
415 1 => {
416 self.storage.push(ch as u8);
417 }
418 _ => self
419 .storage
420 .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()),
421 }
422 }
423
424 /// Removes the last character from the string buffer and returns it.
425 ///
426 /// Returns [`None`] if this `String` is empty.
427 ///
428 /// # Examples
429 ///
430 /// Basic usage:
431 ///
432 /// ```
433 /// # use generic_str::String;
434 /// let mut s = String::from("foo");
435 ///
436 /// assert_eq!(s.pop(), Some('o'));
437 /// assert_eq!(s.pop(), Some('o'));
438 /// assert_eq!(s.pop(), Some('f'));
439 ///
440 /// assert_eq!(s.pop(), None);
441 /// ```
442 #[inline]
443 pub fn pop(&mut self) -> Option<char> {
444 let ch = self.chars().rev().next()?;
445 let newlen = self.len() - ch.len_utf8();
446 unsafe {
447 self.storage.set_len_unchecked(newlen);
448 }
449 Some(ch)
450 }
451
452 /// Shortens this `String` to the specified length.
453 ///
454 /// If `new_len` is greater than the string's current length, this has no
455 /// effect.
456 ///
457 /// Note that this method has no effect on the allocated capacity
458 /// of the string
459 ///
460 /// # Panics
461 ///
462 /// Panics if `new_len` does not lie on a [`char`] boundary.
463 ///
464 /// # Examples
465 ///
466 /// Basic usage:
467 ///
468 /// ```
469 /// # use generic_str::{str, String};
470 /// let mut s = String::from("hello");
471 ///
472 /// s.truncate(2);
473 ///
474 /// assert_eq!(s, <&str>::from("he"));
475 /// ```
476 #[inline]
477 pub fn truncate(&mut self, new_len: usize) {
478 if new_len <= self.len() {
479 assert!(self.is_char_boundary(new_len));
480 self.storage.truncate(new_len)
481 }
482 }
483
484 /// Removes a [`char`] from this `String` at a byte position and returns it.
485 ///
486 /// This is an *O*(*n*) operation, as it requires copying every element in the
487 /// buffer.
488 ///
489 /// # Panics
490 ///
491 /// Panics if `idx` is larger than or equal to the `String`'s length,
492 /// or if it does not lie on a [`char`] boundary.
493 ///
494 /// # Examples
495 ///
496 /// Basic usage:
497 ///
498 /// ```
499 /// # use generic_str::String;
500 /// let mut s = String::from("foo");
501 ///
502 /// assert_eq!(s.remove(0), 'f');
503 /// assert_eq!(s.remove(1), 'o');
504 /// assert_eq!(s.remove(0), 'o');
505 /// ```
506 #[inline]
507 pub fn remove(&mut self, idx: usize) -> char {
508 let ch = match self[idx..].chars().next() {
509 Some(ch) => ch,
510 None => panic!("cannot remove a char from the end of a string"),
511 };
512
513 let next = idx + ch.len_utf8();
514 let len = self.len();
515 unsafe {
516 core::ptr::copy(
517 self.storage.as_ptr().add(next),
518 self.storage.as_mut_ptr().add(idx),
519 len - next,
520 );
521 self.storage.set_len_unchecked(len - (next - idx));
522 }
523 ch
524 }
525
526 /// Inserts a character into this `String` at a byte position.
527 ///
528 /// This is an *O*(*n*) operation as it requires copying every element in the
529 /// buffer.
530 ///
531 /// # Panics
532 ///
533 /// Panics if `idx` is larger than the `String`'s length, or if it does not
534 /// lie on a [`char`] boundary.
535 ///
536 /// # Examples
537 ///
538 /// Basic usage:
539 ///
540 /// ```
541 /// # use generic_str::{str, String};
542 /// let mut s = String::with_capacity(3);
543 ///
544 /// s.insert(0, 'f');
545 /// s.insert(1, 'o');
546 /// s.insert(2, 'o');
547 ///
548 /// assert_eq!(s, <&str>::from("foo"));
549 /// ```
550 #[inline]
551 pub fn insert(&mut self, idx: usize, ch: char) {
552 assert!(self.is_char_boundary(idx));
553 let mut bits = [0; 4];
554 let bits = ch.encode_utf8(&mut bits).as_bytes();
555
556 unsafe {
557 self.insert_bytes(idx, bits);
558 }
559 }
560
561 unsafe fn insert_bytes(&mut self, idx: usize, bytes: &[u8]) {
562 let len = self.len();
563 let amt = bytes.len();
564 self.storage.reserve(amt);
565
566 core::ptr::copy(
567 self.storage.as_ptr().add(idx),
568 self.storage.as_mut_ptr().add(idx + amt),
569 len - idx,
570 );
571 core::ptr::copy(bytes.as_ptr(), self.storage.as_mut_ptr().add(idx), amt);
572 self.storage.set_len_unchecked(len + amt);
573 }
574
575 /// Inserts a string slice into this `String` at a byte position.
576 ///
577 /// This is an *O*(*n*) operation as it requires copying every element in the
578 /// buffer.
579 ///
580 /// # Panics
581 ///
582 /// Panics if `idx` is larger than the `String`'s length, or if it does not
583 /// lie on a [`char`] boundary.
584 ///
585 /// # Examples
586 ///
587 /// Basic usage:
588 ///
589 /// ```
590 /// # use generic_str::{str, String};
591 /// let mut s = String::from("bar");
592 ///
593 /// s.insert_str(0, "foo");
594 ///
595 /// assert_eq!(s, <&str>::from("foobar"));
596 /// ```
597 #[inline]
598 pub fn insert_str(&mut self, idx: usize, string: &str) {
599 assert!(self.is_char_boundary(idx));
600
601 unsafe {
602 self.insert_bytes(idx, string.as_bytes());
603 }
604 }
605
606 /// Returns a mutable reference to the contents of this `String`.
607 ///
608 /// # Safety
609 ///
610 /// This function is unsafe because it does not check that the bytes passed
611 /// to it are valid UTF-8. If this constraint is violated, it may cause
612 /// memory unsafety issues with future users of the `String`, as the rest of
613 /// the standard library assumes that `String`s are valid UTF-8.
614 ///
615 /// # Examples
616 ///
617 /// Basic usage:
618 ///
619 /// ```
620 /// # use generic_str::{str, String};
621 /// let mut s = String::from("hello");
622 ///
623 /// unsafe {
624 /// let vec = s.as_mut_vec();
625 /// assert_eq!(&[104, 101, 108, 108, 111][..], &vec[..]);
626 ///
627 /// vec.reverse();
628 /// }
629 /// assert_eq!(s, <&str>::from("olleh"));
630 /// ```
631 #[inline]
632 pub unsafe fn as_mut_vec(&mut self) -> &mut GenericVec<S::Item, S> {
633 &mut self.storage
634 }
635
636 /// Splits the string into two at the given byte index.
637 ///
638 /// Returns a newly allocated `String`. `self` contains bytes `[0, at)`, and
639 /// the returned `String` contains bytes `[at, len)`. `at` must be on the
640 /// boundary of a UTF-8 code point.
641 ///
642 /// Note that the capacity of `self` does not change.
643 ///
644 /// # Panics
645 ///
646 /// Panics if `at` is not on a `UTF-8` code point boundary, or if it is beyond the last
647 /// code point of the string.
648 ///
649 /// # Examples
650 ///
651 /// ```
652 /// # use generic_str::{str, String};
653 /// # fn main() {
654 /// let mut hello = String::from("Hello, World!");
655 /// let world: String = hello.split_off(7);
656 /// assert_eq!(hello, <&str>::from("Hello, "));
657 /// assert_eq!(world, <&str>::from("World!"));
658 /// # }
659 /// ```
660 #[inline]
661 #[must_use = "use `.truncate()` if you don't need the other half"]
662 pub fn split_off<B: ?Sized + StorageWithCapacity<Item = u8>>(
663 &mut self,
664 at: usize,
665 ) -> StringBase<GenericVec<S::Item, B>> {
666 assert!(self.is_char_boundary(at));
667 let other = self.storage.split_off(at);
668 unsafe { StringBase::from_utf8_unchecked(other) }
669 }
670
671 /// Truncates this `String`, removing all contents.
672 ///
673 /// While this means the `String` will have a length of zero, it does not
674 /// touch its capacity.
675 ///
676 /// # Examples
677 ///
678 /// Basic usage:
679 ///
680 /// ```
681 /// # use generic_str::String;
682 /// let mut s = String::from("foo");
683 ///
684 /// s.clear();
685 ///
686 /// assert!(s.is_empty());
687 /// assert_eq!(0, s.len());
688 /// assert_eq!(3, s.capacity());
689 /// ```
690 #[inline]
691 pub fn clear(&mut self) {
692 self.storage.clear()
693 }
694
695 /// Returns this `String`'s capacity, in bytes.
696 ///
697 /// # Examples
698 ///
699 /// Basic usage:
700 ///
701 /// ```
702 /// # use generic_str::String;
703 /// let s = String::with_capacity(10);
704 ///
705 /// assert!(s.capacity() >= 10);
706 /// ```
707 #[inline]
708 pub fn capacity(&self) -> usize {
709 self.storage.capacity()
710 }
711}