Skip to main content

zigzag_alloc/collections/
string.rs

1//! Growable UTF-8 string with an explicit allocator.
2//!
3//! [`ExString`] is a thin wrapper around [`ExVec<u8>`] that maintains the
4//! invariant that the byte buffer is valid UTF-8.  It implements
5//! [`fmt::Write`] so it can be used as the target of `write!` / `writeln!`.
6//!
7//! Like all collections in this crate, `ExString` never touches the global
8//! allocator — every allocation is routed through the provided
9//! [`Allocator`] reference.
10
11use core::fmt::{self, Write as FmtWrite};
12
13use crate::alloc::allocator::Allocator;
14use super::ExVec;
15
16/// A growable, allocator-backed UTF-8 string.
17///
18/// # Invariant
19///
20/// The byte buffer `buf` always contains valid UTF-8.  Methods that accept
21/// raw `&str` or `char` values uphold this invariant automatically.  The
22/// `as_str` / `as_bytes` accessors expose the buffer read-only so callers
23/// cannot introduce invalid UTF-8.
24///
25/// # Lifetime
26///
27/// The allocator reference `'a` must outlive the `ExString`.
28pub struct ExString<'a> {
29    /// Underlying byte buffer; always valid UTF-8.
30    buf: ExVec<'a, u8>,
31}
32
33impl<'a> ExString<'a> {
34    /// Creates a new, empty `ExString` that will allocate through `alloc`.
35    pub fn new(alloc: &'a dyn Allocator) -> Self {
36        Self { buf: ExVec::new(alloc) }
37    }
38
39    /// Creates an `ExString` pre-populated with a copy of `s`.
40    pub fn from_str(s: &str, alloc: &'a dyn Allocator) -> Self {
41        let mut this = Self::new(alloc);
42        this.push_str(s);
43        this
44    }
45
46    /// Appends the string slice `s` to the end of this string.
47    ///
48    /// Grows the underlying buffer if necessary.
49    ///
50    /// # Panics
51    ///
52    /// Panics if the backing allocator cannot satisfy the growth request.
53    pub fn push_str(&mut self, s: &str) {
54        self.buf.push_slice(s.as_bytes());
55    }
56
57    /// Appends the Unicode scalar `ch` (encoded as UTF-8) to this string.
58    ///
59    /// # Panics
60    ///
61    /// Panics if the backing allocator cannot satisfy the growth request.
62    pub fn push(&mut self, ch: char) {
63        let mut tmp = [0u8; 4];
64        self.push_str(ch.encode_utf8(&mut tmp));
65    }
66
67    /// Returns the string content as a `&str`.
68    ///
69    /// # Safety Justification
70    ///
71    /// The buffer is always valid UTF-8 because only `push_str` and `push`
72    /// can append bytes, and both sources (`&str` / `char`) are guaranteed
73    /// to be valid UTF-8 by Rust's type system.  `clear` simply sets `len = 0`
74    /// without writing invalid bytes.
75    #[inline]
76    pub fn as_str(&self) -> &str {
77        // SAFETY: `buf` contains valid UTF-8 at all times — see module invariant.
78        unsafe { core::str::from_utf8_unchecked(self.buf.as_slice()) }
79    }
80
81    /// Returns the number of bytes (not Unicode code points) in the string.
82    #[inline] pub fn len(&self)      -> usize { self.buf.len() }
83    /// Returns `true` if the string contains no bytes.
84    #[inline] pub fn is_empty(&self) -> bool  { self.buf.is_empty() }
85    /// Returns the number of bytes the buffer can hold without reallocating.
86    #[inline] pub fn capacity(&self) -> usize { self.buf.capacity() }
87    /// Returns the raw byte slice of the string contents.
88    #[inline] pub fn as_bytes(&self) -> &[u8] { self.buf.as_slice() }
89
90    /// Clears the string, setting its length to zero.
91    ///
92    /// Does **not** release the backing allocation.
93    pub fn clear(&mut self) {
94        // SAFETY: Setting `len = 0` is safe for `u8` which has no destructor.
95        unsafe { self.buf.set_len(0) };
96    }
97
98    /// Returns the byte offset of the first occurrence of `byte`, or `None`.
99    #[inline]
100    pub fn find_byte(&self, byte: u8) -> Option<usize> {
101        self.buf.find_byte(byte)
102    }
103
104    /// Returns `true` if the string contains the given byte.
105    #[inline]
106    pub fn contains_byte(&self, byte: u8) -> bool {
107        self.find_byte(byte).is_some()
108    }
109
110    /// Returns the number of times `byte` appears in the string.
111    pub fn count_byte(&self, byte: u8) -> usize {
112        let ptr = self.buf.as_ptr();
113        let n   = self.buf.len();
114        let mut count = 0usize;
115        let mut i     = 0usize;
116        while i < n {
117            match unsafe { crate::simd::find_byte(ptr.add(i), byte, n - i) } {
118                Some(off) => { count += 1; i += off + 1; }
119                None      => break,
120            }
121        }
122        count
123    }
124
125    /// Calls `f` with the byte offset of every occurrence of `byte`.
126    pub fn for_each_byte_match<F: FnMut(usize)>(&self, byte: u8, mut f: F) {
127        self.buf.for_each_byte_match(byte, &mut f);
128    }
129
130    /// Replaces every occurrence of `from` with `to` in-place.
131    ///
132    /// # Panics
133    ///
134    /// If `from` and `to` have different UTF-8 lengths, the resulting string
135    /// may no longer be valid UTF-8.  This method is designed for single-byte
136    /// replacements only (e.g. replacing `b'\n'` with `b' '`).
137    pub fn replace_byte(&mut self, from: u8, to: u8) {
138        let n   = self.buf.len();
139        let ptr = self.buf.as_mut_slice().as_mut_ptr();
140        let mut i = 0usize;
141        while i < n {
142            match unsafe { crate::simd::find_byte(ptr.add(i), from, n - i) } {
143                Some(off) => {
144                    // SAFETY: `i + off < n`, so `ptr + i + off` is within the
145                    // initialised buffer.
146                    unsafe { *ptr.add(i + off) = to };
147                    i += off + 1;
148                }
149                None => break,
150            }
151        }
152    }
153}
154
155impl FmtWrite for ExString<'_> {
156    /// Appends `s` to the string.  Called by `write!` / `writeln!`.
157    fn write_str(&mut self, s: &str) -> fmt::Result {
158        self.push_str(s);
159        Ok(())
160    }
161}
162
163impl fmt::Display for ExString<'_> {
164    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165        f.write_str(self.as_str())
166    }
167}
168
169impl fmt::Debug for ExString<'_> {
170    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171        write!(f, "{:?}", self.as_str())
172    }
173}
174
175impl PartialEq<str> for ExString<'_> {
176    fn eq(&self, other: &str) -> bool { self.as_str() == other }
177}
178
179impl PartialEq for ExString<'_> {
180    fn eq(&self, other: &Self) -> bool { self.as_str() == other.as_str() }
181}