in_place_string_map/lib.rs
1//! `in_place_string_map` is a library for doing string manipulation in place.
2//!
3//! Normally in Rust, if you wanted to handle escapes, for example, you'd need to either map to a
4//! new String, causing allocations, or do `.remove` and `.insert` calls on a String, which
5//! wouldn't cause reallocations if you never grow the String, but *would* cause slowdowns on large
6//! strings due to the need to backshift elements on every item.
7//!
8//! Here, you can just do
9//!
10//! ```rust
11//! use in_place_string_map::MapInPlace;
12//!
13//! fn decode_percent(s: &mut str) -> &mut str {
14//! let mut m = MapInPlace::new(s);
15//!
16//! while let Some(c) = m.pop() {
17//! match c {
18//! '%' => {
19//! let num = m.pop_chars(2).expect("not enough chars");
20//! let n = u8::from_str_radix(num, 16).expect("invalid hex");
21//! m.push(n as char).expect("no more capacity");
22//! }
23//! _ => {
24//! m.push(c).expect("no more capacity");
25//! }
26//! }
27//! }
28//!
29//! m.into_mapped()
30//! }
31//!
32//! let mut input = String::from("%54r%61ns %52igh%74%73%21");
33//!
34//! assert_eq!(decode_percent(&mut input), "Trans Rights!");
35//! ```
36//!
37//! ## Safety
38//!
39//! This library takes care to ensure that the input string is always left in a valid state.
40//!
41//! Since [`core::mem::forget`] is safe, no code can soundly rely on users to call destructors. The
42//! contents of the original borrowed string after any operation is left unspecified generally, but
43//! it is guaranteed to always be valid UTF-8.
44
45// https://twitter.com/reduct_rs/status/1387153973010829315
46#![cfg_attr(all(not(test)), no_std)]
47#![warn(clippy::all)]
48#![warn(clippy::pedantic)]
49#![warn(clippy::nursery)]
50#![warn(clippy::cargo)]
51#![warn(missing_docs)]
52
53/// Safety: Identical to [`core::str::from_utf8_unchecked`], only has a debug assertion that it is
54/// indeed valid UTF-8.
55unsafe fn from_utf8_unchecked(input: &[u8]) -> &str {
56 debug_assert!(
57 core::str::from_utf8(input).is_ok(),
58 "{:?} was invalid UTF-8",
59 input
60 );
61
62 core::str::from_utf8_unchecked(input)
63}
64
65/// Safety: Identical to [`core::str::from_utf8_unchecked_mut`], only has a debug assertion that it is
66/// indeed valid UTF-8.
67unsafe fn from_utf8_unchecked_mut(input: &mut [u8]) -> &mut str {
68 debug_assert!(
69 core::str::from_utf8(input).is_ok(),
70 "{:?} was invalid UTF-8",
71 input
72 );
73
74 core::str::from_utf8_unchecked_mut(input)
75}
76
77#[derive(Debug)]
78/// A mutable reference to a [`str`] that allows for in-place pushes and pops while maintaining
79/// valid UTF-8 at all times.
80///
81/// Semantically, this creates 2 buffers, a "mapped" buffer, and an "unmapped" buffer.
82///
83/// The mapped buffer starts off empty, and the unmapped buffer starts off with the full contents
84/// of the `&mut str` given in [`MapInPlace::new`].
85///
86/// The mapped buffer can be pushed to, and this will append to the end of it. The unmapped buffer
87/// can be popped from, and this will pop from the start of it.
88///
89/// The size of the mapped buffer, plus the size of the unmapped buffer, can never be bigger than
90/// the original string size, and you will get errors when you go to push if this is the case.
91///
92/// However, it's free to be smaller, in which case there will be some area in the middle with
93/// unspecified contents. It will still be valid UTF-8 though, to ensure safety.
94pub struct MapInPlace<'a> {
95 /// Invariants:
96 ///
97 /// * This must always be valid UTF-8 when the [`MapInPlace`] is exposed to code
98 /// outside this crate.
99 buf: &'a mut [u8],
100
101 /// Invariants:
102 ///
103 /// * `0..mapped_head` must always be in bounds for [`MapInPlace.buf`]
104 /// * `0..mapped_head` must always be valid UTF-8 (when exposed to code outside this crate)
105 mapped_head: usize,
106
107 /// Invariants:
108 ///
109 /// * `unmapped_head` must be in bounds
110 /// * `unmapped_head..` always in bounds for [`MapInPlace.buf`]
111 unmapped_head: usize,
112}
113
114/// Checks that `byte` is the first byte in a UTF-8 code point
115/// sequence
116///
117/// Based of std library [`str::is_char_boundary`]
118#[inline]
119// we intentionally wrap here
120#[allow(clippy::cast_possible_wrap)]
121const fn is_char_start(byte: u8) -> bool {
122 // This is bit magic equivalent to: b < 128 || b >= 192
123 (byte as i8) >= -0x40
124}
125
126/// An error indicating that there was no capacity remaining when a push was attempted.
127///
128/// You should [`MapInPlace::pop`] more characters in order to make room for a push.
129///
130/// Keep in mind that not every UTF-8 character is the same size, so you may get this error even if
131/// you always have more pops than pushes, if you are pushing larger characters.
132///
133/// # Examples
134///
135/// ```rust
136/// use in_place_string_map::{MapInPlace, NoCapacityError};
137///
138/// let mut string = String::from("$");
139/// let mut map = MapInPlace::new(&mut string);
140///
141/// map.pop();
142///
143/// let error: NoCapacityError = map.push('£').unwrap_err();
144/// ```
145#[derive(Debug)]
146pub struct NoCapacityError(());
147
148// When zeroing sections of the string to ensure valid UTF-8, anything smaller than this will just
149// be zeroed with a .fill(), rather than trying to stop early.
150//
151// Anything larger will try to stop as early as it can and still ensure valid UTF-8
152const PARTIAL_ZERO_SIZE: usize = 32;
153
154impl<'a> MapInPlace<'a> {
155 /// Creates a new `MapInPlace`, used to do in-place string conversions without allocating a new
156 /// buffer.
157 ///
158 /// ```rust
159 /// let mut string = String::from("Hello, World!");
160 /// let mut map = in_place_string_map::MapInPlace::new(&mut string);
161 /// ```
162 pub fn new(s: &'a mut str) -> Self {
163 // Safety:
164 //
165 // When this borrow ends (MapInPlace is dropped/forgotten), the string must be valid UTF-8.
166 // We also need to never expose invalid UTF-8 to the user.
167 let buf = unsafe { s.as_bytes_mut() };
168
169 MapInPlace {
170 buf,
171 mapped_head: 0,
172 unmapped_head: 0,
173 }
174 }
175
176 /// Returns the mapped portion of the string.
177 ///
178 /// ```rust
179 /// let mut string = String::from("Hello, World!");
180 /// let mut map = in_place_string_map::MapInPlace::new(&mut string);
181 ///
182 /// assert_eq!(map.mapped(), "");
183 ///
184 /// map.pop_chars(6);
185 ///
186 /// map.push_str("Yellow");
187 ///
188 /// assert_eq!(map.mapped(), "Yellow");
189 /// ```
190 #[must_use]
191 pub fn mapped(&self) -> &str {
192 debug_assert!(self.buf.get(0..self.mapped_head).is_some());
193
194 // Safety: self.mapped_head has the invariant that it is always in bounds of `buf`.
195 let bytes = unsafe { self.buf.get_unchecked(0..self.mapped_head) };
196
197 unsafe { from_utf8_unchecked(bytes) }
198 }
199
200 /// Consumes this [`MapInPlace`] and returns the mapped slice of the original string with the
201 /// original lifetime.
202 ///
203 /// This is useful for when you want the lifetime of the returned string to outlive the
204 /// instance of [`MapInPlace`].
205 ///
206 /// ```rust
207 /// fn push_yellow(s: &mut str) -> &mut str {
208 /// let mut map = in_place_string_map::MapInPlace::new(s);
209 /// map.pop_chars(6);
210 /// map.push_str("Yellow");
211 /// map.into_mapped()
212 /// }
213 ///
214 /// let mut string = String::from("Hello, World!");
215 /// let result = push_yellow(&mut string);
216 /// assert_eq!(result, "Yellow");
217 /// ```
218 ///
219 /// You cannot simply use [`MapInPlace::mapped`] because that will return a reference that
220 /// can't outlive the original [`MapInPlace`]
221 ///
222 /// ```compile_fail
223 /// fn push_yellow(s: &mut str) -> &str {
224 /// let mut map = in_place_string_map::MapInPlace::new(s);
225 /// map.pop_chars(6);
226 /// map.push_str("Yellow");
227 ///
228 /// // cannot return value referencing local variable `map`
229 /// map.mapped()
230 /// }
231 ///
232 /// let mut string = String::from("Hello, World!");
233 /// let result = push_yellow(&mut string);
234 /// assert_eq!(result, "Yellow");
235 /// ```
236 #[must_use]
237 pub fn into_mapped(self) -> &'a mut str {
238 let mapped_head = self.mapped_head;
239
240 &mut self.into_all()[0..mapped_head]
241 }
242
243 /// Returns the not yet mapped portion of the string.
244 ///
245 /// ```rust
246 /// let mut string = String::from("Hello, World!");
247 /// let mut map = in_place_string_map::MapInPlace::new(&mut string);
248 ///
249 /// assert_eq!(map.unmapped(), "Hello, World!");
250 ///
251 /// map.pop_chars(5);
252 ///
253 /// assert_eq!(map.unmapped(), ", World!");
254 /// ```
255 #[must_use]
256 pub fn unmapped(&self) -> &str {
257 &self.all()[self.unmapped_head..]
258 }
259
260 /// Consumes this [`MapInPlace`] and returns the unmapped slice of the original string with the
261 /// original lifetime.
262 ///
263 /// This is useful for when you want the lifetime of the returned string to outlive the
264 /// instance of [`MapInPlace`].
265 ///
266 /// ```rust
267 /// fn pop_five(s: &mut str) -> &mut str {
268 /// let mut map = in_place_string_map::MapInPlace::new(s);
269 /// map.pop_chars(5);
270 /// map.into_unmapped()
271 /// }
272 ///
273 /// let mut string = String::from("Hello, World!");
274 /// let result = pop_five(&mut string);
275 /// assert_eq!(result, ", World!");
276 /// ```
277 ///
278 /// You cannot simply use [`MapInPlace::mapped`] because that will return a reference that
279 /// can't outlive the original [`MapInPlace`]
280 ///
281 /// ```compile_fail
282 /// fn pop_five(s: &mut str) -> &str {
283 /// let mut map = in_place_string_map::MapInPlace::new(s);
284 /// map.pop_chars(5);
285 ///
286 /// // cannot return value referencing local variable `map`
287 /// map.unmapped()
288 /// }
289 ///
290 /// let mut string = String::from("Hello, World!");
291 /// let result = pop_five(&mut string);
292 /// assert_eq!(result, ", World!");
293 /// ```
294 #[must_use]
295 pub fn into_unmapped(self) -> &'a mut str {
296 let unmapped_head = self.unmapped_head;
297
298 &mut self.into_all()[unmapped_head..]
299 }
300
301 #[must_use]
302 fn all(&self) -> &str {
303 // Safety: self.buf is always valid UTF-8 if the user has access to it, so this is safe.
304 unsafe { from_utf8_unchecked(&self.buf[..]) }
305 }
306
307 #[must_use]
308 fn into_all(self) -> &'a mut str {
309 // Safety: self.buf is always valid UTF-8 if the user has access to it, so this is safe.
310 unsafe { from_utf8_unchecked_mut(self.buf) }
311 }
312
313 /// Pushes a character onto the end of the mapped portion.
314 ///
315 /// ```rust
316 /// let mut string = String::from("Hello!");
317 /// let mut map = in_place_string_map::MapInPlace::new(&mut string);
318 /// map.pop_chars(6);
319 ///
320 /// assert_eq!(map.mapped(), "");
321 /// map.push('£').unwrap();
322 /// map.push('1').unwrap();
323 /// map.push('.').unwrap();
324 /// map.push('2').unwrap();
325 /// map.push('5').unwrap();
326 ///
327 /// assert_eq!(map.mapped(), "£1.25");
328 ///
329 /// map.push('5').unwrap_err();
330 ///
331 /// assert_eq!(map.mapped(), "£1.25");
332 /// ```
333 ///
334 /// # Errors
335 ///
336 /// * [`NoCapacityError`]: If there is not enough room to fit `ch` being pushed.
337 pub fn push(&mut self, ch: char) -> Result<(), NoCapacityError> {
338 let mut tempbuf = [0_u8; 4_usize];
339
340 let sbytes = ch.encode_utf8(&mut tempbuf);
341
342 self.push_str(sbytes)?;
343
344 Ok(())
345 }
346
347 /// Pushes a string onto the end of the mapped portion. If the string is too long, an error is
348 /// returned, and no changes will be made to the input.
349 ///
350 /// ```rust
351 /// let mut string = String::from("Hello!");
352 /// let mut map = in_place_string_map::MapInPlace::new(&mut string);
353 /// map.pop_chars(6);
354 ///
355 /// assert_eq!(map.mapped(), "");
356 ///
357 /// map.push_str("This string is *far* too long!").unwrap_err();
358 ///
359 /// assert_eq!(map.mapped(), "");
360 ///
361 /// map.push_str("Short").unwrap();
362 ///
363 /// assert_eq!(map.mapped(), "Short");
364 ///
365 /// map.push_str(".").unwrap();
366 ///
367 /// assert_eq!(map.mapped(), "Short.");
368 /// ```
369 ///
370 /// # Errors
371 ///
372 /// * [`NoCapacityError`]: If there is not enough room to fit `s` being pushed.
373 pub fn push_str(&mut self, s: &str) -> Result<(), NoCapacityError> {
374 let bytes = s.as_bytes();
375
376 if self.buf.len() < self.mapped_head + bytes.len() {
377 return Err(NoCapacityError(()));
378 }
379
380 if self.unmapped_head < self.mapped_head + bytes.len() {
381 return Err(NoCapacityError(()));
382 }
383
384 // Safety: self.buf must be valid UTF-8 once this ends.
385 //
386 // It consists of:
387 // ..mapped_head, which is a `str` and we only push valid strs onto it
388 // mapped_head..unmapped_head, which consists of the previous contents of the str
389 // where an unspecified amount is zeroed
390 // unmapped_head.., which is a `str` and we only pop chars from it
391 self.buf[self.mapped_head..self.mapped_head + bytes.len()].copy_from_slice(bytes);
392
393 self.mapped_head += bytes.len();
394 debug_assert!(self.mapped_head <= self.unmapped_head);
395
396 let area_to_zero = &mut self.buf[self.mapped_head..self.unmapped_head];
397
398 if area_to_zero.len() > PARTIAL_ZERO_SIZE {
399 for byte in area_to_zero {
400 if is_char_start(*byte) {
401 break;
402 }
403 *byte = 0;
404 }
405 } else {
406 area_to_zero.fill(0);
407 }
408
409 Ok(())
410 }
411
412 /// Pops a character from the start of the unmapped portion
413 ///
414 /// Will return [`None`] if there are no more characters left to pop.
415 ///
416 /// ```rust
417 /// let mut string = String::from("Hi!");
418 /// let mut map = in_place_string_map::MapInPlace::new(&mut string);
419 ///
420 /// assert_eq!(map.pop(), Some('H'));
421 /// assert_eq!(map.unmapped(), "i!");
422 ///
423 /// assert_eq!(map.pop(), Some('i'));
424 /// assert_eq!(map.unmapped(), "!");
425 ///
426 /// assert_eq!(map.pop(), Some('!'));
427 /// assert_eq!(map.unmapped(), "");
428 ///
429 /// assert_eq!(map.pop(), None);
430 /// assert_eq!(map.unmapped(), "");
431 /// ```
432 pub fn pop(&mut self) -> Option<char> {
433 self.pop_chars(1)
434 .map(|x| x.chars().next().expect("pop_chars did not pop a char"))
435 }
436
437 /// Pops `n` characters from the start of the unmapped portion.
438 ///
439 /// Note how this pops in terms of *characters*, not bytes.
440 ///
441 /// If `n` is 0 then will always return [`None`]
442 ///
443 /// If this fails because there are not enough characters then will return [`None`], and no
444 /// changes will have been made to `self`.
445 ///
446 /// ```rust
447 /// let mut string = String::from("A £3.00 sandwich");
448 /// let mut map = in_place_string_map::MapInPlace::new(&mut string);
449 ///
450 /// assert_eq!(map.pop_chars(0), None);
451 /// assert_eq!(map.pop_chars(2), Some("A "));
452 /// assert_eq!(map.pop_chars(5), Some("£3.00"));
453 ///
454 /// // Nothing is done if you try to pop too many characters
455 /// assert_eq!(map.pop_chars(10), None);
456 ///
457 /// assert_eq!(map.pop_chars(9), Some(" sandwich"));
458 /// ```
459 pub fn pop_chars(&mut self, n: usize) -> Option<&str> {
460 if n == 0 {
461 return None;
462 }
463
464 let (idx, c) = self.unmapped().char_indices().nth(n - 1)?;
465
466 let to_take = idx + c.len_utf8();
467
468 let s = &self.buf[self.unmapped_head..self.unmapped_head + to_take];
469
470 self.unmapped_head += to_take;
471
472 // Safety of from_utf8_unchecked:
473 //
474 // We slice the buffer starting at the original value for self.unmapped_head, which must be
475 // on a char boundary, this is an invariant of the type for safety (otherwise,
476 // self.unmapped() would create invalid UTF-8).
477 //
478 // self.unmapped_head is incremented by to_take, which also leaves it on a char boundary,
479 // because `idx` is on a char boundary when looked at relative to self.unmapped_head (since
480 // we got it from char_indices on self.unmapped())
481 //
482 // We add c.len_utf8() on top, which makes to_take on a char boundary relative to
483 // self.unmapped_head.
484 //
485 // Therefore, `s` is always valid UTF-8.
486
487 // We also need to keep the invariant that self.unmapped_head is always on a char boundary.
488 // But we already know it must be, since we're adding `to_take` to it, which the argument
489 // above proves is on a char boundary.
490 unsafe { Some(from_utf8_unchecked(s)) }
491 }
492}
493
494// Source: https://github.com/rust-lang/cargo/issues/383#issuecomment-720873790
495#[cfg(doctest)]
496mod test_readme {
497 macro_rules! external_doc_test {
498 ($x:expr) => {
499 #[doc = $x]
500 extern "C" {}
501 };
502 }
503
504 external_doc_test!(include_str!("../README.md"));
505}
506
507#[cfg(test)]
508mod tests {
509 use super::*;
510
511 #[test]
512 fn cannot_remove_from_end() {
513 let mut initial = "㉉".to_string();
514 let mut mapper = MapInPlace::new(&mut initial);
515 mapper.pop_chars(3);
516 }
517}