zigzag_alloc/collections/string.rs
1//! Growable UTF-8 string with an explicit allocator.
2//!
3//! [`ExString`] is a thin wrapper around [`ExVec<u8>`] that maintains the
4//! invariant that the byte buffer is valid UTF-8. It implements
5//! [`fmt::Write`] so it can be used as the target of `write!` / `writeln!`.
6//!
7//! Like all collections in this crate, `ExString` never touches the global
8//! allocator — every allocation is routed through the provided
9//! [`Allocator`] reference.
10
11use core::fmt::{self, Write as FmtWrite};
12
13use crate::alloc::allocator::Allocator;
14use super::ExVec;
15
16/// A growable, allocator-backed UTF-8 string.
17///
18/// # Invariant
19///
20/// The byte buffer `buf` always contains valid UTF-8. Methods that accept
21/// raw `&str` or `char` values uphold this invariant automatically. The
22/// `as_str` / `as_bytes` accessors expose the buffer read-only so callers
23/// cannot introduce invalid UTF-8.
24///
25/// # Lifetime
26///
27/// The allocator reference `'a` must outlive the `ExString`.
28pub struct ExString<'a> {
29 /// Underlying byte buffer; always valid UTF-8.
30 buf: ExVec<'a, u8>,
31}
32
33impl<'a> ExString<'a> {
34 /// Creates a new, empty `ExString` that will allocate through `alloc`.
35 pub fn new(alloc: &'a dyn Allocator) -> Self {
36 Self { buf: ExVec::new(alloc) }
37 }
38
39 /// Creates an `ExString` pre-populated with a copy of `s`.
40 pub fn from_str(s: &str, alloc: &'a dyn Allocator) -> Self {
41 let mut this = Self::new(alloc);
42 this.push_str(s);
43 this
44 }
45
46 /// Appends the string slice `s` to the end of this string.
47 ///
48 /// Grows the underlying buffer if necessary.
49 ///
50 /// # Panics
51 ///
52 /// Panics if the backing allocator cannot satisfy the growth request.
53 pub fn push_str(&mut self, s: &str) {
54 self.buf.push_slice(s.as_bytes());
55 }
56
57 /// Appends the Unicode scalar `ch` (encoded as UTF-8) to this string.
58 ///
59 /// # Panics
60 ///
61 /// Panics if the backing allocator cannot satisfy the growth request.
62 pub fn push(&mut self, ch: char) {
63 let mut tmp = [0u8; 4];
64 self.push_str(ch.encode_utf8(&mut tmp));
65 }
66
67 /// Returns the string content as a `&str`.
68 ///
69 /// # Safety Justification
70 ///
71 /// The buffer is always valid UTF-8 because only `push_str` and `push`
72 /// can append bytes, and both sources (`&str` / `char`) are guaranteed
73 /// to be valid UTF-8 by Rust's type system. `clear` simply sets `len = 0`
74 /// without writing invalid bytes.
75 #[inline]
76 pub fn as_str(&self) -> &str {
77 // SAFETY: `buf` contains valid UTF-8 at all times — see module invariant.
78 unsafe { core::str::from_utf8_unchecked(self.buf.as_slice()) }
79 }
80
81 /// Returns the number of bytes (not Unicode code points) in the string.
82 #[inline] pub fn len(&self) -> usize { self.buf.len() }
83 /// Returns `true` if the string contains no bytes.
84 #[inline] pub fn is_empty(&self) -> bool { self.buf.is_empty() }
85 /// Returns the number of bytes the buffer can hold without reallocating.
86 #[inline] pub fn capacity(&self) -> usize { self.buf.capacity() }
87 /// Returns the raw byte slice of the string contents.
88 #[inline] pub fn as_bytes(&self) -> &[u8] { self.buf.as_slice() }
89
90 /// Clears the string, setting its length to zero.
91 ///
92 /// Does **not** release the backing allocation.
93 pub fn clear(&mut self) {
94 // SAFETY: Setting `len = 0` is safe for `u8` which has no destructor.
95 unsafe { self.buf.set_len(0) };
96 }
97
98 /// Returns the byte offset of the first occurrence of `byte`, or `None`.
99 #[inline]
100 pub fn find_byte(&self, byte: u8) -> Option<usize> {
101 self.buf.find_byte(byte)
102 }
103
104 /// Returns `true` if the string contains the given byte.
105 #[inline]
106 pub fn contains_byte(&self, byte: u8) -> bool {
107 self.find_byte(byte).is_some()
108 }
109
110 /// Returns the number of times `byte` appears in the string.
111 pub fn count_byte(&self, byte: u8) -> usize {
112 let ptr = self.buf.as_ptr();
113 let n = self.buf.len();
114 let mut count = 0usize;
115 let mut i = 0usize;
116 while i < n {
117 match unsafe { crate::simd::find_byte(ptr.add(i), byte, n - i) } {
118 Some(off) => { count += 1; i += off + 1; }
119 None => break,
120 }
121 }
122 count
123 }
124
125 /// Calls `f` with the byte offset of every occurrence of `byte`.
126 pub fn for_each_byte_match<F: FnMut(usize)>(&self, byte: u8, mut f: F) {
127 self.buf.for_each_byte_match(byte, &mut f);
128 }
129
130 /// Replaces every occurrence of `from` with `to` in-place.
131 ///
132 /// # Panics
133 ///
134 /// If `from` and `to` have different UTF-8 lengths, the resulting string
135 /// may no longer be valid UTF-8. This method is designed for single-byte
136 /// replacements only (e.g. replacing `b'\n'` with `b' '`).
137 pub fn replace_byte(&mut self, from: u8, to: u8) {
138 let n = self.buf.len();
139 let ptr = self.buf.as_mut_slice().as_mut_ptr();
140 let mut i = 0usize;
141 while i < n {
142 match unsafe { crate::simd::find_byte(ptr.add(i), from, n - i) } {
143 Some(off) => {
144 // SAFETY: `i + off < n`, so `ptr + i + off` is within the
145 // initialised buffer.
146 unsafe { *ptr.add(i + off) = to };
147 i += off + 1;
148 }
149 None => break,
150 }
151 }
152 }
153}
154
155impl FmtWrite for ExString<'_> {
156 /// Appends `s` to the string. Called by `write!` / `writeln!`.
157 fn write_str(&mut self, s: &str) -> fmt::Result {
158 self.push_str(s);
159 Ok(())
160 }
161}
162
163impl fmt::Display for ExString<'_> {
164 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165 f.write_str(self.as_str())
166 }
167}
168
169impl fmt::Debug for ExString<'_> {
170 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171 write!(f, "{:?}", self.as_str())
172 }
173}
174
175impl PartialEq<str> for ExString<'_> {
176 fn eq(&self, other: &str) -> bool { self.as_str() == other }
177}
178
179impl PartialEq for ExString<'_> {
180 fn eq(&self, other: &Self) -> bool { self.as_str() == other.as_str() }
181}