in_place_string_map/
lib.rs

1//! `in_place_string_map` is a library for doing string manipulation in place.
2//!
3//! Normally in Rust, if you wanted to handle escapes, for example, you'd need to either map to a
4//! new String, causing allocations, or do `.remove` and `.insert` calls on a String, which
5//! wouldn't cause reallocations if you never grow the String, but *would* cause slowdowns on large
6//! strings due to the need to backshift elements on every item.
7//!
8//! Here, you can just do
9//!
10//! ```rust
11//! use in_place_string_map::MapInPlace;
12//!
13//! fn decode_percent(s: &mut str) -> &mut str {
14//!     let mut m = MapInPlace::new(s);
15//!
16//!     while let Some(c) = m.pop() {
17//!         match c {
18//!             '%' => {
19//!                 let num = m.pop_chars(2).expect("not enough chars");
20//!                 let n = u8::from_str_radix(num, 16).expect("invalid hex");
21//!                 m.push(n as char).expect("no more capacity");
22//!             }
23//!             _ => {
24//!                 m.push(c).expect("no more capacity");
25//!             }
26//!         }
27//!     }
28//!
29//!     m.into_mapped()
30//! }
31//!
32//! let mut input = String::from("%54r%61ns %52igh%74%73%21");
33//!
34//! assert_eq!(decode_percent(&mut input), "Trans Rights!");
35//! ```
36//!
37//! ## Safety
38//!
39//! This library takes care to ensure that the input string is always left in a valid state.
40//!
41//! Since [`core::mem::forget`] is safe, no code can soundly rely on users to call destructors. The
42//! contents of the original borrowed string after any operation is left unspecified generally, but
43//! it is guaranteed to always be valid UTF-8.
44
45// https://twitter.com/reduct_rs/status/1387153973010829315
46#![cfg_attr(all(not(test)), no_std)]
47#![warn(clippy::all)]
48#![warn(clippy::pedantic)]
49#![warn(clippy::nursery)]
50#![warn(clippy::cargo)]
51#![warn(missing_docs)]
52
53/// Safety: Identical to [`core::str::from_utf8_unchecked`], only has a debug assertion that it is
54/// indeed valid UTF-8.
55unsafe fn from_utf8_unchecked(input: &[u8]) -> &str {
56    debug_assert!(
57        core::str::from_utf8(input).is_ok(),
58        "{:?} was invalid UTF-8",
59        input
60    );
61
62    core::str::from_utf8_unchecked(input)
63}
64
65/// Safety: Identical to [`core::str::from_utf8_unchecked_mut`], only has a debug assertion that it is
66/// indeed valid UTF-8.
67unsafe fn from_utf8_unchecked_mut(input: &mut [u8]) -> &mut str {
68    debug_assert!(
69        core::str::from_utf8(input).is_ok(),
70        "{:?} was invalid UTF-8",
71        input
72    );
73
74    core::str::from_utf8_unchecked_mut(input)
75}
76
77#[derive(Debug)]
78/// A mutable reference to a [`str`] that allows for in-place pushes and pops while maintaining
79/// valid UTF-8 at all times.
80///
81/// Semantically, this creates 2 buffers, a "mapped" buffer, and an "unmapped" buffer.
82///
83/// The mapped buffer starts off empty, and the unmapped buffer starts off with the full contents
84/// of the `&mut str` given in [`MapInPlace::new`].
85///
86/// The mapped buffer can be pushed to, and this will append to the end of it. The unmapped buffer
87/// can be popped from, and this will pop from the start of it.
88///
89/// The size of the mapped buffer, plus the size of the unmapped buffer, can never be bigger than
90/// the original string size, and you will get errors when you go to push if this is the case.
91///
92/// However, it's free to be smaller, in which case there will be some area in the middle with
93/// unspecified contents. It will still be valid UTF-8 though, to ensure safety.
94pub struct MapInPlace<'a> {
95    /// Invariants:
96    ///
97    /// * This must always be valid UTF-8 when the [`MapInPlace`] is exposed to code
98    /// outside this crate.
99    buf: &'a mut [u8],
100
101    /// Invariants:
102    ///
103    /// * `0..mapped_head` must always be in bounds for [`MapInPlace.buf`]
104    /// * `0..mapped_head` must always be valid UTF-8 (when exposed to code outside this crate)
105    mapped_head: usize,
106
107    /// Invariants:
108    ///
109    /// * `unmapped_head` must be in bounds
110    /// * `unmapped_head..` always in bounds for [`MapInPlace.buf`]
111    unmapped_head: usize,
112}
113
114/// Checks that `byte` is the first byte in a UTF-8 code point
115/// sequence
116///
117/// Based of std library [`str::is_char_boundary`]
118#[inline]
119// we intentionally wrap here
120#[allow(clippy::cast_possible_wrap)]
121const fn is_char_start(byte: u8) -> bool {
122    // This is bit magic equivalent to: b < 128 || b >= 192
123    (byte as i8) >= -0x40
124}
125
126/// An error indicating that there was no capacity remaining when a push was attempted.
127///
128/// You should [`MapInPlace::pop`] more characters in order to make room for a push.
129///
130/// Keep in mind that not every UTF-8 character is the same size, so you may get this error even if
131/// you always have more pops than pushes, if you are pushing larger characters.
132///
133/// # Examples
134///
135/// ```rust
136/// use in_place_string_map::{MapInPlace, NoCapacityError};
137///
138/// let mut string = String::from("$");
139/// let mut map = MapInPlace::new(&mut string);
140///
141/// map.pop();
142///
143/// let error: NoCapacityError = map.push('£').unwrap_err();
144/// ```
145#[derive(Debug)]
146pub struct NoCapacityError(());
147
148// When zeroing sections of the string to ensure valid UTF-8, anything smaller than this will just
149// be zeroed with a .fill(), rather than trying to stop early.
150//
151// Anything larger will try to stop as early as it can and still ensure valid UTF-8
152const PARTIAL_ZERO_SIZE: usize = 32;
153
154impl<'a> MapInPlace<'a> {
155    /// Creates a new `MapInPlace`, used to do in-place string conversions without allocating a new
156    /// buffer.
157    ///
158    /// ```rust
159    /// let mut string = String::from("Hello, World!");
160    /// let mut map = in_place_string_map::MapInPlace::new(&mut string);
161    /// ```
162    pub fn new(s: &'a mut str) -> Self {
163        // Safety:
164        //
165        // When this borrow ends (MapInPlace is dropped/forgotten), the string must be valid UTF-8.
166        // We also need to never expose invalid UTF-8 to the user.
167        let buf = unsafe { s.as_bytes_mut() };
168
169        MapInPlace {
170            buf,
171            mapped_head: 0,
172            unmapped_head: 0,
173        }
174    }
175
176    /// Returns the mapped portion of the string.
177    ///
178    /// ```rust
179    /// let mut string = String::from("Hello, World!");
180    /// let mut map = in_place_string_map::MapInPlace::new(&mut string);
181    ///
182    /// assert_eq!(map.mapped(), "");
183    ///
184    /// map.pop_chars(6);
185    ///
186    /// map.push_str("Yellow");
187    ///
188    /// assert_eq!(map.mapped(), "Yellow");
189    /// ```
190    #[must_use]
191    pub fn mapped(&self) -> &str {
192        debug_assert!(self.buf.get(0..self.mapped_head).is_some());
193
194        // Safety: self.mapped_head has the invariant that it is always in bounds of `buf`.
195        let bytes = unsafe { self.buf.get_unchecked(0..self.mapped_head) };
196
197        unsafe { from_utf8_unchecked(bytes) }
198    }
199
200    /// Consumes this [`MapInPlace`] and returns the mapped slice of the original string with the
201    /// original lifetime.
202    ///
203    /// This is useful for when you want the lifetime of the returned string to outlive the
204    /// instance of [`MapInPlace`].
205    ///
206    /// ```rust
207    /// fn push_yellow(s: &mut str) -> &mut str {
208    ///     let mut map = in_place_string_map::MapInPlace::new(s);
209    ///     map.pop_chars(6);
210    ///     map.push_str("Yellow");
211    ///     map.into_mapped()
212    /// }
213    ///
214    /// let mut string = String::from("Hello, World!");
215    /// let result = push_yellow(&mut string);
216    /// assert_eq!(result, "Yellow");
217    /// ```
218    ///
219    /// You cannot simply use [`MapInPlace::mapped`] because that will return a reference that
220    /// can't outlive the original [`MapInPlace`]
221    ///
222    /// ```compile_fail
223    /// fn push_yellow(s: &mut str) -> &str {
224    ///     let mut map = in_place_string_map::MapInPlace::new(s);
225    ///     map.pop_chars(6);
226    ///     map.push_str("Yellow");
227    ///
228    ///     // cannot return value referencing local variable `map`
229    ///     map.mapped()
230    /// }
231    ///
232    /// let mut string = String::from("Hello, World!");
233    /// let result = push_yellow(&mut string);
234    /// assert_eq!(result, "Yellow");
235    /// ```
236    #[must_use]
237    pub fn into_mapped(self) -> &'a mut str {
238        let mapped_head = self.mapped_head;
239
240        &mut self.into_all()[0..mapped_head]
241    }
242
243    /// Returns the not yet mapped portion of the string.
244    ///
245    /// ```rust
246    /// let mut string = String::from("Hello, World!");
247    /// let mut map = in_place_string_map::MapInPlace::new(&mut string);
248    ///
249    /// assert_eq!(map.unmapped(), "Hello, World!");
250    ///
251    /// map.pop_chars(5);
252    ///
253    /// assert_eq!(map.unmapped(), ", World!");
254    /// ```
255    #[must_use]
256    pub fn unmapped(&self) -> &str {
257        &self.all()[self.unmapped_head..]
258    }
259
260    /// Consumes this [`MapInPlace`] and returns the unmapped slice of the original string with the
261    /// original lifetime.
262    ///
263    /// This is useful for when you want the lifetime of the returned string to outlive the
264    /// instance of [`MapInPlace`].
265    ///
266    /// ```rust
267    /// fn pop_five(s: &mut str) -> &mut str {
268    ///     let mut map = in_place_string_map::MapInPlace::new(s);
269    ///     map.pop_chars(5);
270    ///     map.into_unmapped()
271    /// }
272    ///
273    /// let mut string = String::from("Hello, World!");
274    /// let result = pop_five(&mut string);
275    /// assert_eq!(result, ", World!");
276    /// ```
277    ///
278    /// You cannot simply use [`MapInPlace::mapped`] because that will return a reference that
279    /// can't outlive the original [`MapInPlace`]
280    ///
281    /// ```compile_fail
282    /// fn pop_five(s: &mut str) -> &str {
283    ///     let mut map = in_place_string_map::MapInPlace::new(s);
284    ///     map.pop_chars(5);
285    ///
286    ///     // cannot return value referencing local variable `map`
287    ///     map.unmapped()
288    /// }
289    ///
290    /// let mut string = String::from("Hello, World!");
291    /// let result = pop_five(&mut string);
292    /// assert_eq!(result, ", World!");
293    /// ```
294    #[must_use]
295    pub fn into_unmapped(self) -> &'a mut str {
296        let unmapped_head = self.unmapped_head;
297
298        &mut self.into_all()[unmapped_head..]
299    }
300
301    #[must_use]
302    fn all(&self) -> &str {
303        // Safety: self.buf is always valid UTF-8 if the user has access to it, so this is safe.
304        unsafe { from_utf8_unchecked(&self.buf[..]) }
305    }
306
307    #[must_use]
308    fn into_all(self) -> &'a mut str {
309        // Safety: self.buf is always valid UTF-8 if the user has access to it, so this is safe.
310        unsafe { from_utf8_unchecked_mut(self.buf) }
311    }
312
313    /// Pushes a character onto the end of the mapped portion.
314    ///
315    /// ```rust
316    /// let mut string = String::from("Hello!");
317    /// let mut map = in_place_string_map::MapInPlace::new(&mut string);
318    /// map.pop_chars(6);
319    ///
320    /// assert_eq!(map.mapped(), "");
321    /// map.push('£').unwrap();
322    /// map.push('1').unwrap();
323    /// map.push('.').unwrap();
324    /// map.push('2').unwrap();
325    /// map.push('5').unwrap();
326    ///
327    /// assert_eq!(map.mapped(), "£1.25");
328    ///
329    /// map.push('5').unwrap_err();
330    ///
331    /// assert_eq!(map.mapped(), "£1.25");
332    /// ```
333    ///
334    /// # Errors
335    ///
336    /// * [`NoCapacityError`]: If there is not enough room to fit `ch` being pushed.
337    pub fn push(&mut self, ch: char) -> Result<(), NoCapacityError> {
338        let mut tempbuf = [0_u8; 4_usize];
339
340        let sbytes = ch.encode_utf8(&mut tempbuf);
341
342        self.push_str(sbytes)?;
343
344        Ok(())
345    }
346
347    /// Pushes a string onto the end of the mapped portion. If the string is too long, an error is
348    /// returned, and no changes will be made to the input.
349    ///
350    /// ```rust
351    /// let mut string = String::from("Hello!");
352    /// let mut map = in_place_string_map::MapInPlace::new(&mut string);
353    /// map.pop_chars(6);
354    ///
355    /// assert_eq!(map.mapped(), "");
356    ///
357    /// map.push_str("This string is *far* too long!").unwrap_err();
358    ///
359    /// assert_eq!(map.mapped(), "");
360    ///
361    /// map.push_str("Short").unwrap();
362    ///
363    /// assert_eq!(map.mapped(), "Short");
364    ///
365    /// map.push_str(".").unwrap();
366    ///
367    /// assert_eq!(map.mapped(), "Short.");
368    /// ```
369    ///
370    /// # Errors
371    ///
372    /// * [`NoCapacityError`]: If there is not enough room to fit `s` being pushed.
373    pub fn push_str(&mut self, s: &str) -> Result<(), NoCapacityError> {
374        let bytes = s.as_bytes();
375
376        if self.buf.len() < self.mapped_head + bytes.len() {
377            return Err(NoCapacityError(()));
378        }
379
380        if self.unmapped_head < self.mapped_head + bytes.len() {
381            return Err(NoCapacityError(()));
382        }
383
384        // Safety: self.buf must be valid UTF-8 once this ends.
385        //
386        // It consists of:
387        // ..mapped_head, which is a `str` and we only push valid strs onto it
388        // mapped_head..unmapped_head, which consists of the previous contents of the str
389        //   where an unspecified amount is zeroed
390        // unmapped_head.., which is a `str` and we only pop chars from it
391        self.buf[self.mapped_head..self.mapped_head + bytes.len()].copy_from_slice(bytes);
392
393        self.mapped_head += bytes.len();
394        debug_assert!(self.mapped_head <= self.unmapped_head);
395
396        let area_to_zero = &mut self.buf[self.mapped_head..self.unmapped_head];
397
398        if area_to_zero.len() > PARTIAL_ZERO_SIZE {
399            for byte in area_to_zero {
400                if is_char_start(*byte) {
401                    break;
402                }
403                *byte = 0;
404            }
405        } else {
406            area_to_zero.fill(0);
407        }
408
409        Ok(())
410    }
411
412    /// Pops a character from the start of the unmapped portion
413    ///
414    /// Will return [`None`] if there are no more characters left to pop.
415    ///
416    /// ```rust
417    /// let mut string = String::from("Hi!");
418    /// let mut map = in_place_string_map::MapInPlace::new(&mut string);
419    ///
420    /// assert_eq!(map.pop(), Some('H'));
421    /// assert_eq!(map.unmapped(), "i!");
422    ///
423    /// assert_eq!(map.pop(), Some('i'));
424    /// assert_eq!(map.unmapped(), "!");
425    ///
426    /// assert_eq!(map.pop(), Some('!'));
427    /// assert_eq!(map.unmapped(), "");
428    ///
429    /// assert_eq!(map.pop(), None);
430    /// assert_eq!(map.unmapped(), "");
431    /// ```
432    pub fn pop(&mut self) -> Option<char> {
433        self.pop_chars(1)
434            .map(|x| x.chars().next().expect("pop_chars did not pop a char"))
435    }
436
437    /// Pops `n` characters from the start of the unmapped portion.
438    ///
439    /// Note how this pops in terms of *characters*, not bytes.
440    ///
441    /// If `n` is 0 then will always return [`None`]
442    ///
443    /// If this fails because there are not enough characters then will return [`None`], and no
444    /// changes will have been made to `self`.
445    ///
446    /// ```rust
447    /// let mut string = String::from("A £3.00 sandwich");
448    /// let mut map = in_place_string_map::MapInPlace::new(&mut string);
449    ///
450    /// assert_eq!(map.pop_chars(0), None);
451    /// assert_eq!(map.pop_chars(2), Some("A "));
452    /// assert_eq!(map.pop_chars(5), Some("£3.00"));
453    ///
454    /// // Nothing is done if you try to pop too many characters
455    /// assert_eq!(map.pop_chars(10), None);
456    ///
457    /// assert_eq!(map.pop_chars(9), Some(" sandwich"));
458    /// ```
459    pub fn pop_chars(&mut self, n: usize) -> Option<&str> {
460        if n == 0 {
461            return None;
462        }
463
464        let (idx, c) = self.unmapped().char_indices().nth(n - 1)?;
465
466        let to_take = idx + c.len_utf8();
467
468        let s = &self.buf[self.unmapped_head..self.unmapped_head + to_take];
469
470        self.unmapped_head += to_take;
471
472        // Safety of from_utf8_unchecked:
473        //
474        // We slice the buffer starting at the original value for self.unmapped_head, which must be
475        // on a char boundary, this is an invariant of the type for safety (otherwise,
476        // self.unmapped() would create invalid UTF-8).
477        //
478        // self.unmapped_head is incremented by to_take, which also leaves it on a char boundary,
479        // because `idx` is on a char boundary when looked at relative to self.unmapped_head (since
480        // we got it from char_indices on self.unmapped())
481        //
482        // We add c.len_utf8() on top, which makes to_take on a char boundary relative to
483        // self.unmapped_head.
484        //
485        // Therefore, `s` is always valid UTF-8.
486
487        // We also need to keep the invariant that self.unmapped_head is always on a char boundary.
488        // But we already know it must be, since we're adding `to_take` to it, which the argument
489        // above proves is on a char boundary.
490        unsafe { Some(from_utf8_unchecked(s)) }
491    }
492}
493
494// Source: https://github.com/rust-lang/cargo/issues/383#issuecomment-720873790
495#[cfg(doctest)]
496mod test_readme {
497    macro_rules! external_doc_test {
498        ($x:expr) => {
499            #[doc = $x]
500            extern "C" {}
501        };
502    }
503
504    external_doc_test!(include_str!("../README.md"));
505}
506
507#[cfg(test)]
508mod tests {
509    use super::*;
510
511    #[test]
512    fn cannot_remove_from_end() {
513        let mut initial = "㉉".to_string();
514        let mut mapper = MapInPlace::new(&mut initial);
515        mapper.pop_chars(3);
516    }
517}