cursor/cursors/extensions/
string.rs

1// Copyright 2021 Hwakyeom Kim(=just-do-halee)
2
3use super::*;
4
5#[derive(PartialEq, Eq, Clone)]
6pub struct StrCursor<'s, E: Extras<char> = NoneExtras<char>> {
7    cursor: Cursor<'s, u8, NoneExtras<u8>>,
8    len: Option<usize>,
9    info: StrCursorInfo<E>,
10    saved_info: StrCursorInfo<E>,
11}
12
13impl<E: Extras<char>> fmt::Debug for StrCursor<'_, E> {
14    #[inline]
15    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
16        f.debug_tuple("StrCursor")
17            .field(&self.as_preserved_str())
18            .field(&self.current())
19            .field(&self.as_remaining_str())
20            .finish()
21    }
22}
23
24impl<E: Extras<char>> ToExtras<E> for StrCursor<'_, E> {
25    type Input = char;
26    #[inline]
27    fn to_extras(&self) -> E {
28        self.info.extras.clone()
29    }
30}
31
32/// this will reset the newer cursor
33impl<E: Extras<char>> ToCursor<u8> for StrCursor<'_, E> {}
34
35impl<E: Extras<char>> AsRef<[u8]> for StrCursor<'_, E> {
36    #[inline]
37    fn as_ref(&self) -> &[u8] {
38        self.as_bytes()
39    }
40}
41
42impl<'s, E: Extras<char>> AsRef<Self> for StrCursor<'s, E> {
43    #[inline]
44    fn as_ref(&self) -> &Self {
45        self
46    }
47}
48
49impl<'s, E: Extras<char>> AsMut<Self> for StrCursor<'s, E> {
50    #[inline]
51    fn as_mut(&mut self) -> &mut Self {
52        self
53    }
54}
55
56#[inline]
57fn str_cursor_new<EXTRAS: Extras<char>>(string: &str) -> StrCursor<EXTRAS> {
58    StrCursor {
59        cursor: Cursor::new(string.as_bytes()),
60        len: None,
61        info: StrCursorInfo::new(),
62        saved_info: StrCursorInfo::new(),
63    }
64}
65
66impl<'s> StrCursor<'s, NoneExtras<char>> {
67    #[inline]
68    pub fn new(string: &'s str) -> Self {
69        str_cursor_new(string)
70    }
71    #[inline]
72    pub fn new_with_extras<EXTRAS: Extras<char>>(string: &str) -> StrCursor<EXTRAS> {
73        str_cursor_new(string)
74    }
75}
76
77impl<'s, E: Extras<char>> StrCursor<'s, E> {
78    // ------ private ------
79    #[inline]
80    fn set_current(&mut self, val: char) {
81        self.info.current = val;
82    }
83    /// * WARNING: directly sets backwards. no effects.
84    #[inline]
85    pub fn unsafe_set_backwards(&mut self, new_backwards: bool) {
86        *self.cursor.backwards_mut() = new_backwards;
87    }
88    #[inline]
89    pub fn set_backwards(&mut self, new_backwards: bool) {
90        if self.backwards() != new_backwards {
91            self.load_char_start_pos();
92        }
93        *self.cursor.backwards_mut() = new_backwards;
94    }
95    #[inline]
96    fn set_char_start_pos(&mut self, val: usize) {
97        self.info.char_start_pos = val;
98    }
99    /// load code point
100    #[inline]
101    fn load_char_start_pos(&mut self) {
102        if self.pos_as_bytes() != self.char_start_pos() {
103            self.cursor.unsafe_set_pos(self.char_start_pos());
104        }
105    }
106    /// purely sets position + blushes extras. returns current().
107    #[inline]
108    fn set_pos(&mut self, new_pos: usize) -> Option<char> {
109        if new_pos != self.pos() {
110            self.info.pos = new_pos;
111            self.blush_extras();
112        }
113
114        Some(self.current())
115    }
116    /// * WARNING: directly sets byte position. no effects.
117    #[inline]
118    fn unsafe_set_pos_as_bytes(&mut self, new_pos: usize) {
119        self.cursor.unsafe_set_pos(new_pos);
120    }
121    #[inline]
122    fn blush_extras(&mut self) {
123        if !self.noeffects() {
124            self.info.extras.change(&self.current(), self.pos());
125        }
126    }
127
128    #[inline]
129    fn jump_to_added(&mut self, rhs: usize) -> Option<char> {
130        self.jump_to_offset(rhs as isize)
131    }
132    #[inline]
133    fn jump_to_subed(&mut self, rhs: usize) -> Option<char> {
134        self.jump_to_offset(-(rhs as isize))
135    }
136
137    // ------ public ------
138    #[inline]
139    pub fn unwrapped_next(&mut self) -> char {
140        self.next().unwrap()
141    }
142}
143
144impl<'s, E: Extras<char>> Iterator for StrCursor<'s, E> {
145    type Item = char;
146    #[inline]
147    fn next(&mut self) -> Option<Self::Item> {
148        // 1 byte next and then scanning char
149        // save the first code point pos
150        self.cursor.next()?;
151        let byte_pos = self.pos_as_bytes();
152        let (ch, pos) = match self.backwards() {
153            false => {
154                let ch = utf::next_char(&mut self.cursor)?;
155                (ch, self.pos().checked_add(1)?)
156            }
157            true => {
158                let ch = utf::next_back_char(&mut self.cursor)?;
159                (ch, self.pos().checked_sub(1)?)
160            }
161        };
162        self.set_char_start_pos(byte_pos);
163        if self.is_init() {
164            self.set_current(ch);
165            self.set_pos(pos)
166        } else {
167            self.set_current(ch);
168            self.blush_extras();
169            Some(self.current())
170        }
171    }
172}
173
174impl<'s, E: Extras<char>> StrCursorTrait<'s, E> for StrCursor<'s, E> {
175    #[inline]
176    fn is_init(&self) -> bool {
177        self.current() != EOF_CHAR
178    }
179    /// if `next` or `jump` can effect the [`Extras`](Extras).
180    #[inline]
181    fn noeffects(&self) -> bool {
182        self.info.noeffects
183    }
184    #[inline]
185    fn noeffects_mut(&mut self) -> &mut bool {
186        &mut self.info.noeffects
187    }
188    #[inline]
189    fn backwards(&self) -> bool {
190        self.cursor.backwards()
191    }
192    #[inline]
193    fn turnaround(&mut self) {
194        self.load_char_start_pos();
195        self.cursor.turnaround();
196    }
197    #[inline]
198    fn pos(&self) -> usize {
199        self.info.pos
200    }
201    #[inline]
202    fn pos_as_bytes(&self) -> usize {
203        self.cursor.pos()
204    }
205    #[inline]
206    fn char_start_pos(&self) -> usize {
207        self.info.char_start_pos
208    }
209    /// if you've never tried it before,
210    /// this method will create remainder iterator and then
211    /// consume it to count number of chars.
212    #[inline]
213    fn len(&mut self) -> usize {
214        if let Some(n) = self.len {
215            n
216        } else {
217            let offset = self
218                .as_right_side_bytes()
219                .iter()
220                .filter(|&&byte| !utf::utf8_is_cont_byte(byte))
221                .count();
222            self.len = Some(self.pos() + offset + 1);
223            self.len()
224        }
225    }
226    #[inline]
227    fn is_len(&self) -> bool {
228        self.len.is_some()
229    }
230    #[inline]
231    fn len_as_bytes(&self) -> usize {
232        self.cursor.len()
233    }
234    #[inline]
235    fn is_empty(&self) -> bool {
236        self.as_bytes().len() == 0
237    }
238    #[inline]
239    fn as_bytes(&self) -> &'s [u8] {
240        self.cursor.as_slice()
241    }
242    #[inline]
243    fn extras(&self) -> &E {
244        &self.info.extras
245    }
246    #[inline]
247    fn extras_mut(&mut self) -> &mut E {
248        &mut self.info.extras
249    }
250    #[inline]
251    fn current(&self) -> char {
252        self.info.current
253    }
254    #[inline]
255    fn reset(&mut self) {
256        self.info.reset();
257        self.cursor.reset();
258    }
259    #[inline]
260    fn save(&mut self) {
261        // sets inner info at the first-time
262        self.info.inner = self.cursor.info.clone();
263        self.saved_info = self.info.clone();
264    }
265    #[inline]
266    fn saved(&self) -> &StrCursorInfo<E> {
267        &self.saved_info
268    }
269    #[inline]
270    fn load(&mut self) {
271        self.info = self.saved_info.clone();
272    }
273    #[inline]
274    fn jump_to_last(&mut self) -> char {
275        let last_pos = self.len().saturating_sub(1);
276        self.jump(last_pos).unwrap()
277    }
278    /// - if you had tried `len` before,
279    /// - this method does more performance.
280    /// * *[inline function]*
281    #[inline]
282    fn jump(&mut self, pos: usize) -> Option<char> {
283        let ch = match pos {
284            _ if self.is_init() && pos == self.pos() => return Some(self.current()),
285            0 => {
286                self.unsafe_set_pos_as_bytes(0);
287                let ch = utf::next_char(&mut self.cursor)?;
288                if self.backwards() {
289                    self.set_char_start_pos(self.pos_as_bytes());
290                    self.unsafe_set_pos_as_bytes(0);
291                } else {
292                    self.set_char_start_pos(0);
293                }
294                ch
295            }
296            _ if matches!(self.len, Some(len) if len.saturating_sub(1) == pos) => {
297                let byte_last_pos = self.len_as_bytes().saturating_sub(1);
298                self.unsafe_set_pos_as_bytes(byte_last_pos);
299                let ch = utf::next_back_char(&mut self.cursor)?;
300                if self.backwards() {
301                    self.set_char_start_pos(byte_last_pos);
302                } else {
303                    self.set_char_start_pos(self.pos_as_bytes());
304                    self.unsafe_set_pos_as_bytes(byte_last_pos);
305                }
306                ch
307            }
308            _ => {
309                // =-=-=-=-=-=-=-=-=-=-=-=-=-=
310                let (dist, is_dist) = self.pos().detailed_diff(pos);
311                let new_byte_pos = match is_dist {
312                    Ordering::Greater => {
313                        let (offset, _) = self
314                            .cursor
315                            .as_right_side_slice()
316                            .iter()
317                            .enumerate()
318                            .filter(|(_, &byte)| !utf::utf8_is_cont_byte(byte))
319                            .take(dist)
320                            .last()?;
321                        self.cursor.pos() + offset + 1
322                    }
323                    Ordering::Equal => return Some(self.current()),
324                    Ordering::Less => {
325                        let (offset, _) = self
326                            .cursor
327                            .as_left_side_slice_include_current()
328                            .iter()
329                            .rev()
330                            .enumerate()
331                            .filter(|(_, &byte)| !utf::utf8_is_cont_byte(byte))
332                            .take(dist + 1)
333                            .last()?;
334                        self.cursor.pos() - offset
335                    }
336                };
337                self.unsafe_set_pos_as_bytes(new_byte_pos);
338
339                // =-=-=-=-=-=-=-=-=-=-=-=-=-=
340                if !self.cursor.is_init() {
341                    self.cursor.set_init(true);
342                }
343                let ch;
344                if self.backwards() {
345                    ch = utf::next_char(&mut self.cursor)?;
346                    self.set_char_start_pos(self.cursor.pos());
347                    self.unsafe_set_pos_as_bytes(new_byte_pos);
348                } else {
349                    self.set_char_start_pos(new_byte_pos);
350                    ch = utf::next_char(&mut self.cursor)?;
351                }
352                if self.pos_as_bytes() == self.len_as_bytes().saturating_sub(1) {
353                    self.len = Some(pos); // sets length
354                }
355                ch
356            }
357        };
358        self.set_current(ch);
359        self.set_pos(pos)
360    }
361}
362
363// ------- WARNING: isize -------
364
365impl<'s, E: Extras<char>> AddAssign<usize> for StrCursor<'s, E> {
366    #[inline]
367    fn add_assign(&mut self, rhs: usize) {
368        self.jump_to_added(rhs);
369    }
370}
371
372impl<'s, E: Extras<char>> Add<usize> for StrCursor<'s, E> {
373    type Output = Option<char>;
374    #[inline]
375    fn add(mut self, rhs: usize) -> Self::Output {
376        self.jump_to_added(rhs)
377    }
378}
379
380impl<'s, E: Extras<char>> Add<usize> for &mut StrCursor<'s, E> {
381    type Output = Option<char>;
382    #[inline]
383    fn add(self, rhs: usize) -> Self::Output {
384        self.jump_to_added(rhs)
385    }
386}
387
388impl<'s, E: Extras<char>> SubAssign<usize> for StrCursor<'s, E> {
389    #[inline]
390    fn sub_assign(&mut self, rhs: usize) {
391        self.jump_to_subed(rhs);
392    }
393}
394
395impl<'s, E: Extras<char>> Sub<usize> for StrCursor<'s, E> {
396    type Output = Option<char>;
397    #[inline]
398    fn sub(mut self, rhs: usize) -> Self::Output {
399        self.jump_to_subed(rhs)
400    }
401}
402
403impl<'s, E: Extras<char>> Sub<usize> for &mut StrCursor<'s, E> {
404    type Output = Option<char>;
405    #[inline]
406    fn sub(self, rhs: usize) -> Self::Output {
407        self.jump_to_subed(rhs)
408    }
409}