strseq/
mutable.rs

1use crate::{
2    base_trait::{retr, up, StringSequenceView, ToRange},
3    view::SharedStringSequence,
4    StringSequence,
5};
6
7/// A sequence of strings. This is used to represent a path.
8#[derive(Default, Clone)]
9pub struct MutableStringSequence {
10    /// Internal buffer, to represent the sequence of strings.
11    text: Vec<u8>,
12    /// Offsets of the strings in the buffer.
13    index: Vec<std::ops::Range<u32>>,
14}
15
16impl StringSequenceView for MutableStringSequence {
17    fn inner(&self) -> (&[u8], &[std::ops::Range<u32>]) {
18        (&self.text, &self.index)
19    }
20}
21
22/* ---------------------------------------- Vwr Impl ---------------------------------------- */
23impl_seq_view!(MutableStringSequence);
24
25/* ---------------------------------------------------------------------------------------------- */
26/*                                          MANIPULATION                                          */
27/* ---------------------------------------------------------------------------------------------- */
28
29impl MutableStringSequence {
30    /// Create a new empty sequence.
31    pub fn new() -> Self {
32        Self::default()
33    }
34
35    /// Create new instance with capacities
36    pub fn with_capacity(num_tokens: usize, num_chars: usize) -> Self {
37        Self { text: Vec::with_capacity(num_chars), index: Vec::with_capacity(num_tokens) }
38    }
39
40    /// Token array capacity
41    pub fn token_capacity(&self) -> usize {
42        self.index.capacity()
43    }
44
45    /// Text buffer capacity
46    pub fn text_capacity(&self) -> usize {
47        self.text.capacity()
48    }
49
50    /// Reserve space for internal string container.
51    ///
52    /// NOTE: Consider delimiter length when reserving space.
53    pub fn reserve_buffer(&mut self, num_chars: usize) {
54        self.text.reserve(num_chars);
55    }
56
57    /// Reserve space for internal index container. Index container indicates the number of
58    /// `tokens` that can be appended without reallocation.
59    pub fn reserve_index(&mut self, num_tokens: usize) {
60        self.index.reserve(num_tokens);
61    }
62
63    /// Add list of references to the internal buffer.
64    pub fn extend_from_slice<T: AsRef<str>>(&mut self, slice: &[T]) {
65        let buffer_len = slice.iter().fold(0, |acc, s| acc + s.as_ref().len());
66        self.reserve_buffer(buffer_len);
67        self.reserve_index(slice.len());
68
69        let mut offset = self.text.len();
70        for s in slice {
71            let s = s.as_ref();
72            self.index.push(offset as _..(offset + s.len()) as _);
73            self.text.extend_from_slice(s.as_bytes());
74            offset = self.text.len();
75        }
76    }
77
78    /// Append a string to the internal buffer. As we can't pre-calculate required space for
79    /// text buffer, this is inherently inefficient compared to [`Self::extend_from_slice`].
80    pub fn extend<T: AsRef<str>>(&mut self, into_iter: impl IntoIterator<Item = T>) {
81        let iter = into_iter.into_iter();
82        let num_elem_hint = iter.size_hint().0;
83
84        self.reserve_index(num_elem_hint);
85        iter.for_each(|s| self.push_back(&s));
86    }
87
88    /// Remove the string at the specified index.
89    ///
90    /// # Panics
91    ///
92    /// Panics if the index is out of bounds.
93    pub fn remove(&mut self, index: usize) {
94        let range = self.index.remove(index);
95        self.index[index..].iter_mut().for_each(|x| {
96            x.start -= range.len() as u32;
97            x.end -= range.len() as u32;
98        });
99        self.text.drain(up(range));
100    }
101
102    /// Remove the last string quickly.
103    pub fn pop_back(&mut self) {
104        let range = self.index.pop().unwrap();
105        self.text.drain(up(range));
106    }
107
108    /// Append a string to the end of the sequence.
109    pub fn push_back(&mut self, value: impl AsRef<str>) {
110        let value = value.as_ref();
111        let offset = self.text.len();
112        self.index.push(offset as _..(offset + value.len()) as _);
113        self.text.extend_from_slice(value.as_bytes());
114    }
115
116    /// Insert a string at the specified index.
117    pub fn insert(&mut self, index: usize, value: impl AsRef<str>) {
118        let value = value.as_ref().as_bytes();
119        let insert_offset = self.index.get(index).map(|x| x.start).unwrap_or(self.text.len() as _);
120        let offset = insert_offset as usize;
121
122        self.index[index..].iter_mut().for_each(|x| {
123            x.start += value.len() as u32;
124            x.end += value.len() as u32;
125        });
126        self.index.insert(index, offset as _..(offset + value.len()) as _);
127
128        self.text.splice(offset..offset, value.iter().copied());
129    }
130
131    pub fn clear(&mut self) {
132        self.text.clear();
133        self.index.clear();
134    }
135
136    pub fn drain(&mut self, range: impl ToRange) -> Drain {
137        let self_ptr = self as *mut _;
138
139        let range = range.to_range(self.index.len());
140
141        if range.is_empty() {
142            // Early return if the range is empty
143            return Drain { inner: self_ptr, iter: self.index.drain(0..0), text_erase_range: 0..0 };
144        }
145
146        let begin = self.index[range.start].start;
147        let end = self.index[range.end - 1].end;
148
149        // Subtract later element's offset before we process draining
150        let removed_text_len = end - begin;
151        self.index[range.end..].iter_mut().for_each(|x| {
152            x.start -= removed_text_len;
153            x.end -= removed_text_len;
154        });
155
156        let drain_iter = self.index.drain(range);
157        Drain { inner: self_ptr, iter: drain_iter, text_erase_range: begin..end }
158    }
159
160    pub fn into_string_sequence(self) -> StringSequence {
161        StringSequence::from_owned_index(self.index, &self.text)
162    }
163}
164
165/* ------------------------------------- Drain Iterator ------------------------------------- */
166
167pub struct Drain<'a> {
168    inner: *mut MutableStringSequence,
169    text_erase_range: std::ops::Range<u32>,
170    iter: std::vec::Drain<'a, std::ops::Range<u32>>,
171}
172
173impl<'a> Drop for Drain<'a> {
174    fn drop(&mut self) {
175        // SAFETY: We won't touch the `self.index` here, which is mutably borrowed for `iter`
176        unsafe { &mut *self.inner }.text.drain(up(self.text_erase_range.clone()));
177    }
178}
179
180impl<'a> Iterator for Drain<'a> {
181    type Item = &'a str;
182
183    fn next(&mut self) -> Option<Self::Item> {
184        // SAFETY: We won't touch the `self.index` here, which is mutably borrowed for `iter`
185        self.iter.next().map(|range| unsafe { retr(&(*self.inner).text, range) })
186    }
187
188    fn size_hint(&self) -> (usize, Option<usize>) {
189        self.iter.size_hint()
190    }
191}
192
193/* ------------------------------------------------------------------------------------------ */
194/*                                         CONVERSION                                         */
195/* ------------------------------------------------------------------------------------------ */
196
197impl<T: AsRef<str>> FromIterator<T> for MutableStringSequence {
198    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
199        let mut this = Self::default();
200        this.extend(iter);
201        this
202    }
203}
204
205impl MutableStringSequence {
206    pub fn from_slice(slice: &[impl AsRef<str>]) -> Self {
207        let mut this = Self::default();
208        this.extend_from_slice(slice);
209        this
210    }
211}
212
213impl From<String> for MutableStringSequence {
214    fn from(value: String) -> Self {
215        let unique_index = 0..value.len() as u32;
216        Self { index: [unique_index].into(), text: value.into_bytes() }
217    }
218}
219
220impl From<MutableStringSequence> for String {
221    fn from(value: MutableStringSequence) -> Self {
222        // SAFETY: We know that the buffer is strictly managed to be valid UTF-8 string.
223        unsafe { String::from_utf8_unchecked(value.text) }
224    }
225}
226
227impl<'a, T: StringSequenceView> From<&'a T> for MutableStringSequence {
228    fn from(value: &'a T) -> Self {
229        let (buffer, index) = value.inner();
230        Self { text: buffer.to_vec(), index: index.to_vec() }
231    }
232}
233
234impl From<StringSequence> for MutableStringSequence {
235    fn from(value: StringSequence) -> Self {
236        let (buffer, index) = value.inner();
237        Self { text: buffer.to_vec(), index: index.to_vec() }
238    }
239}
240
241impl From<MutableStringSequence> for StringSequence {
242    fn from(value: MutableStringSequence) -> Self {
243        Self::from_owned_index(value.index, &value.text)
244    }
245}
246
247impl From<MutableStringSequence> for SharedStringSequence {
248    fn from(value: MutableStringSequence) -> Self {
249        Self::from(StringSequence::from(value))
250    }
251}
252
253impl From<SharedStringSequence> for MutableStringSequence {
254    fn from(value: SharedStringSequence) -> Self {
255        Self::from(&value)
256    }
257}
258
259impl<'a, T: AsRef<str> + 'a> FromIterator<T> for StringSequence {
260    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
261        MutableStringSequence::from_iter(iter).into()
262    }
263}
264
265impl StringSequence {
266    pub fn from_slice(slice: &[impl AsRef<str>]) -> Self {
267        MutableStringSequence::from_slice(slice).into()
268    }
269}
270
271impl<'a, T: AsRef<str> + 'a> FromIterator<T> for SharedStringSequence {
272    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
273        MutableStringSequence::from_iter(iter).into()
274    }
275}
276
277impl SharedStringSequence {
278    pub fn from_slice(slice: &[impl AsRef<str>]) -> Self {
279        MutableStringSequence::from_slice(slice).into()
280    }
281}