syntax_rs/
cursor.rs

1use std::fmt::Debug;
2
3use crate::utf8;
4
5// FIXME: Is this a good and fast implementation? We should probably change it to use an implementation of peeking like the Peekable iterator.
6// TODO: Add a new feature called u8_char_processing that will disable iterating over unicode codepoints and just iterate over the bytes.
7// TODO: Add a new feature called pre_char_processing that will read all the unicode chars before actually parsing.
8// TODO: Add a new feature called dynamic_char_processing that will dynamically read the chars just like we do here. vvvvvvvvvv
9
10// TODO: Make this more similar to std::io::Cursor by implementing Seek, Read and Write.
11#[derive(Copy, Clone)]
12pub struct Cursor<'a> {
13    slice: &'a str,
14    index: usize,
15}
16
17impl<'a> Cursor<'a> {
18    pub fn new(slice: &'a str) -> Cursor<'a> {
19        Cursor { slice, index: 0 }
20    }
21
22    #[inline]
23    pub fn is_empty(&self) -> bool {
24        self.index >= self.slice.len()
25    }
26
27    #[inline]
28    unsafe fn next_code_point_u32(&mut self) -> Option<u32> {
29        utf8::next_code_point(self.slice.as_bytes(), &mut self.index)
30    }
31
32    // TODO: Change this to use a Peekable-like implementation.
33    #[inline]
34    unsafe fn peek_code_point_u32(&self) -> Option<u32> {
35        utf8::peek_code_point(self.slice.as_bytes(), self.index)
36    }
37
38    // TODO: Do something like the above TODO and move this into another smaller struct. We will then handle warnings about continuation characters here.
39    #[inline]
40    pub fn peek0(&self) -> Option<char> {
41        unsafe {
42            self.peek_code_point_u32()
43                .map(|val| char::from_u32_unchecked(val))
44        }
45    }
46
47    pub fn peek_n(&self, n: usize) -> Option<&'a str> {
48        let begin = self.index;
49        let mut virtual_index = self.index;
50        for _ in 0..n {
51            unsafe {
52                utf8::next_code_point(self.slice.as_bytes(), &mut virtual_index)?;
53            }
54        }
55        Some(&self.slice[begin..virtual_index])
56    }
57
58    pub fn consume(&mut self, target: char) -> bool {
59        let mut virtual_index = self.index;
60        let c = unsafe { utf8::next_code_point(self.slice.as_bytes(), &mut virtual_index) };
61        match c {
62            Some(c) if unsafe { char::from_u32_unchecked(c) } == target => {
63                self.index = virtual_index;
64                true
65            }
66            _ => false,
67        }
68    }
69
70    #[inline]
71    pub fn advance(&mut self) -> Option<char> {
72        unsafe { Some(char::from_u32_unchecked(self.next_code_point_u32()?)) }
73    }
74
75    pub fn advance_n(&mut self, n: usize) -> Option<&'a str> {
76        let begin = self.index;
77        for _ in 0..n {
78            unsafe {
79                utf8::next_code_point(self.slice.as_bytes(), &mut self.index)?;
80            }
81        }
82        Some(&self.slice[begin..self.index])
83    }
84
85    pub fn advance_while(&mut self, mut pred: impl FnMut(char) -> bool) -> &'a str {
86        let begin = self.index;
87
88        let mut next_codepoint_index = self.index;
89        loop {
90            unsafe {
91                match utf8::next_code_point(self.slice.as_bytes(), &mut next_codepoint_index) {
92                    Some(code_point) if pred(char::from_u32_unchecked(code_point)) => {
93                        self.index = next_codepoint_index
94                    }
95                    _ => break,
96                }
97            }
98        }
99        &self.slice[begin..self.index]
100    }
101
102    #[inline]
103    pub fn index(&self) -> usize {
104        self.index
105    }
106
107    #[inline]
108    pub fn iter<'c>(&'c self) -> Iter<'a, 'c> {
109        Iter(self.index, self)
110    }
111}
112
113impl<'a> Debug for Cursor<'a> {
114    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
115        struct LimitDotDot<'a>(&'a str, usize);
116
117        impl<'a> Debug for LimitDotDot<'a> {
118            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
119                if self.0.len() > self.1 {
120                    write!(
121                        f,
122                        "{:?} and {} more..",
123                        &self.0[..self.1],
124                        self.0.len() - self.1
125                    )
126                } else {
127                    write!(f, "{:?}", self.0)
128                }
129            }
130        }
131
132        let slice = &self.slice[self.index..];
133        f.debug_tuple("Cursor")
134            .field(&LimitDotDot(slice, 16))
135            .finish()
136    }
137}
138
139pub struct Iter<'a, 'b>(usize, &'b Cursor<'a>);
140
141impl<'a, 'b> Iterator for Iter<'a, 'b> {
142    type Item = char;
143
144    fn next(&mut self) -> Option<Self::Item> {
145        unsafe {
146            utf8::next_code_point(self.1.slice.as_bytes(), &mut self.0)
147                .map(|v| char::from_u32_unchecked(v))
148        }
149    }
150}