edit/arena/
string.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4use std::fmt;
5use std::ops::{Bound, Deref, DerefMut, RangeBounds};
6
7use super::Arena;
8use crate::helpers::*;
9
10/// A custom string type, because `std` lacks allocator support for [`String`].
11///
12/// To keep things simple, this one is hardcoded to [`Arena`].
13#[derive(Clone)]
14pub struct ArenaString<'a> {
15    vec: Vec<u8, &'a Arena>,
16}
17
18impl<'a> ArenaString<'a> {
19    /// Creates a new [`ArenaString`] in the given arena.
20    #[must_use]
21    pub const fn new_in(arena: &'a Arena) -> Self {
22        Self { vec: Vec::new_in(arena) }
23    }
24
25    #[must_use]
26    pub fn with_capacity_in(capacity: usize, arena: &'a Arena) -> Self {
27        Self { vec: Vec::with_capacity_in(capacity, arena) }
28    }
29
30    /// Turns a [`str`] into an [`ArenaString`].
31    #[must_use]
32    pub fn from_str(arena: &'a Arena, s: &str) -> Self {
33        let mut res = Self::new_in(arena);
34        res.push_str(s);
35        res
36    }
37
38    /// It says right here that you checked if `bytes` is valid UTF-8
39    /// and you are sure it is. Presto! Here's an `ArenaString`!
40    ///
41    /// # Safety
42    ///
43    /// You fool! It says "unchecked" right there. Now the house is burning.
44    #[inline]
45    #[must_use]
46    pub unsafe fn from_utf8_unchecked(bytes: Vec<u8, &'a Arena>) -> Self {
47        Self { vec: bytes }
48    }
49
50    /// Checks whether `text` contains only valid UTF-8.
51    /// If the entire string is valid, it returns `Ok(text)`.
52    /// Otherwise, it returns `Err(ArenaString)` with all invalid sequences replaced with U+FFFD.
53    pub fn from_utf8_lossy<'s>(arena: &'a Arena, text: &'s [u8]) -> Result<&'s str, Self> {
54        let mut iter = text.utf8_chunks();
55        let Some(mut chunk) = iter.next() else {
56            return Ok("");
57        };
58
59        let valid = chunk.valid();
60        if chunk.invalid().is_empty() {
61            debug_assert_eq!(valid.len(), text.len());
62            return Ok(unsafe { str::from_utf8_unchecked(text) });
63        }
64
65        const REPLACEMENT: &str = "\u{FFFD}";
66
67        let mut res = Self::new_in(arena);
68        res.reserve(text.len());
69
70        loop {
71            res.push_str(chunk.valid());
72            if !chunk.invalid().is_empty() {
73                res.push_str(REPLACEMENT);
74            }
75            chunk = match iter.next() {
76                Some(chunk) => chunk,
77                None => break,
78            };
79        }
80
81        Err(res)
82    }
83
84    /// Turns a [`Vec<u8>`] into an [`ArenaString`], replacing invalid UTF-8 sequences with U+FFFD.
85    #[must_use]
86    pub fn from_utf8_lossy_owned(v: Vec<u8, &'a Arena>) -> Self {
87        match Self::from_utf8_lossy(v.allocator(), &v) {
88            Ok(..) => unsafe { Self::from_utf8_unchecked(v) },
89            Err(s) => s,
90        }
91    }
92
93    /// It's empty.
94    pub fn is_empty(&self) -> bool {
95        self.vec.is_empty()
96    }
97
98    /// It's lengthy.
99    pub fn len(&self) -> usize {
100        self.vec.len()
101    }
102
103    /// It's capacatity.
104    pub fn capacity(&self) -> usize {
105        self.vec.capacity()
106    }
107
108    /// It's a [`String`], now it's a [`str`]. Wow!
109    pub fn as_str(&self) -> &str {
110        unsafe { str::from_utf8_unchecked(self.vec.as_slice()) }
111    }
112
113    /// It's a [`String`], now it's a [`str`]. And it's mutable! WOW!
114    pub fn as_mut_str(&mut self) -> &mut str {
115        unsafe { str::from_utf8_unchecked_mut(self.vec.as_mut_slice()) }
116    }
117
118    /// Now it's bytes!
119    pub fn as_bytes(&self) -> &[u8] {
120        self.vec.as_slice()
121    }
122
123    /// Returns a mutable reference to the contents of this `String`.
124    ///
125    /// # Safety
126    ///
127    /// The underlying `&mut Vec` allows writing bytes which are not valid UTF-8.
128    pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<u8, &'a Arena> {
129        &mut self.vec
130    }
131
132    /// Reserves *additional* memory. For you old folks out there (totally not me),
133    /// this is different from C++'s `reserve` which reserves a total size.
134    pub fn reserve(&mut self, additional: usize) {
135        self.vec.reserve(additional)
136    }
137
138    /// Just like [`ArenaString::reserve`], but it doesn't overallocate.
139    pub fn reserve_exact(&mut self, additional: usize) {
140        self.vec.reserve_exact(additional)
141    }
142
143    /// Now it's small! Alarming!
144    ///
145    /// *Do not* call this unless this string is the last thing on the arena.
146    /// Arenas are stacks, they can't deallocate what's in the middle.
147    pub fn shrink_to_fit(&mut self) {
148        self.vec.shrink_to_fit()
149    }
150
151    /// To no surprise, this clears the string.
152    pub fn clear(&mut self) {
153        self.vec.clear()
154    }
155
156    /// Append some text.
157    pub fn push_str(&mut self, string: &str) {
158        self.vec.extend_from_slice(string.as_bytes())
159    }
160
161    /// Append a single character.
162    #[inline]
163    pub fn push(&mut self, ch: char) {
164        match ch.len_utf8() {
165            1 => self.vec.push(ch as u8),
166            _ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()),
167        }
168    }
169
170    /// Same as `push(char)` but with a specified number of character copies.
171    /// Shockingly absent from the standard library.
172    pub fn push_repeat(&mut self, ch: char, total_copies: usize) {
173        if total_copies == 0 {
174            return;
175        }
176
177        let buf = unsafe { self.as_mut_vec() };
178
179        if ch.is_ascii() {
180            // Compiles down to `memset()`.
181            buf.extend(std::iter::repeat_n(ch as u8, total_copies));
182        } else {
183            // Implements efficient string padding using quadratic duplication.
184            let mut utf8_buf = [0; 4];
185            let utf8 = ch.encode_utf8(&mut utf8_buf).as_bytes();
186            let initial_len = buf.len();
187            let added_len = utf8.len() * total_copies;
188            let final_len = initial_len + added_len;
189
190            buf.reserve(added_len);
191            buf.extend_from_slice(utf8);
192
193            while buf.len() != final_len {
194                let end = (final_len - buf.len() + initial_len).min(buf.len());
195                buf.extend_from_within(initial_len..end);
196            }
197        }
198    }
199
200    /// Replaces a range of characters with a new string.
201    pub fn replace_range<R: RangeBounds<usize>>(&mut self, range: R, replace_with: &str) {
202        match range.start_bound() {
203            Bound::Included(&n) => assert!(self.is_char_boundary(n)),
204            Bound::Excluded(&n) => assert!(self.is_char_boundary(n + 1)),
205            Bound::Unbounded => {}
206        };
207        match range.end_bound() {
208            Bound::Included(&n) => assert!(self.is_char_boundary(n + 1)),
209            Bound::Excluded(&n) => assert!(self.is_char_boundary(n)),
210            Bound::Unbounded => {}
211        };
212        unsafe { self.as_mut_vec() }.replace_range(range, replace_with.as_bytes());
213    }
214
215    /// Finds `old` in the string and replaces it with `new`.
216    /// Only performs one replacement.
217    pub fn replace_once_in_place(&mut self, old: &str, new: &str) {
218        if let Some(beg) = self.find(old) {
219            unsafe { self.as_mut_vec() }.replace_range(beg..beg + old.len(), new.as_bytes());
220        }
221    }
222}
223
224impl fmt::Debug for ArenaString<'_> {
225    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
226        fmt::Debug::fmt(&**self, f)
227    }
228}
229
230impl PartialEq<&str> for ArenaString<'_> {
231    fn eq(&self, other: &&str) -> bool {
232        self.as_str() == *other
233    }
234}
235
236impl Deref for ArenaString<'_> {
237    type Target = str;
238
239    fn deref(&self) -> &Self::Target {
240        self.as_str()
241    }
242}
243
244impl DerefMut for ArenaString<'_> {
245    fn deref_mut(&mut self) -> &mut Self::Target {
246        self.as_mut_str()
247    }
248}
249
250impl fmt::Display for ArenaString<'_> {
251    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
252        f.write_str(self.as_str())
253    }
254}
255
256impl fmt::Write for ArenaString<'_> {
257    #[inline]
258    fn write_str(&mut self, s: &str) -> fmt::Result {
259        self.push_str(s);
260        Ok(())
261    }
262
263    #[inline]
264    fn write_char(&mut self, c: char) -> fmt::Result {
265        self.push(c);
266        Ok(())
267    }
268}
269
270#[macro_export]
271macro_rules! arena_format {
272    ($arena:expr, $($arg:tt)*) => {{
273        use std::fmt::Write as _;
274        let mut output = $crate::arena::ArenaString::new_in($arena);
275        output.write_fmt(format_args!($($arg)*)).unwrap();
276        output
277    }}
278}