Skip to main content

emmylua_parser/text/
reader.rs

1use super::text_range::SourceRange;
2use std::str::Chars;
3
4pub const EOF: char = '\0';
5
6/// Reader with look-ahead and look-behind methods.
7///
8/// As you read text, the part that you've read is accumulated
9/// in `current_range`. The part that you haven't seen yet is available
10/// in `tail_range`:
11///
12/// ```text
13/// valid range: a b c d e f g
14///                ^^^          - current range
15///                    ^^^^^^^  - tail range
16///                  ^          - prev char
17///                    ^        - current char
18///                      ^      - next char
19/// ```
20///
21/// Once you call `reset_buff`, current range is advanced to start
22/// at the current char, and shrunk to zero length:
23///
24/// ```text
25/// valid range: a b c d e f g
26///                    .       - current range (empty, starts at `d`)
27///                    ^^^^^^  - tail range
28///                  ^         - prev char
29///                    ^       - current char
30///                      ^     - next char
31/// ```
32///
33/// The workflow in roughly this:
34///
35/// - you read characters, they're put into `saved_range`;
36/// - once you're at a token boundary, you emit a token with `saved_range`,
37///   then call `reset_buff`,
38/// - you continue onto the next token.
39#[derive(Debug, Clone)]
40pub struct Reader<'a> {
41    text: &'a str,
42    valid_range: SourceRange,
43    chars: Chars<'a>,
44    current_buffer_byte_pos: usize,
45    current_buffer_byte_len: usize,
46    next: char,
47    current: char,
48    prev: char,
49}
50
51impl<'a> Reader<'a> {
52    pub fn new(text: &'a str) -> Self {
53        Self::new_with_range(text, SourceRange::new(0, text.len()))
54    }
55
56    pub fn new_with_range(text: &'a str, range: SourceRange) -> Self {
57        assert_eq!(text.len(), range.length);
58        let mut res = Self {
59            text,
60            valid_range: range,
61            chars: text.chars(),
62            current_buffer_byte_pos: 0,
63            current_buffer_byte_len: 0,
64            next: EOF,
65            current: EOF,
66            prev: EOF,
67        };
68
69        res.current = res.chars.next().unwrap_or(EOF);
70        res.next = res.chars.next().unwrap_or(EOF);
71
72        res
73    }
74
75    pub fn bump(&mut self) {
76        if self.current != EOF {
77            self.current_buffer_byte_len += self.current.len_utf8();
78            self.prev = self.current;
79            self.current = self.next;
80            self.next = self.chars.next().unwrap_or(EOF);
81        }
82    }
83
84    pub fn reset_buff(&mut self) {
85        self.current_buffer_byte_pos += self.current_buffer_byte_len;
86        self.current_buffer_byte_len = 0;
87    }
88
89    pub fn reset_buff_into_sub_reader(&mut self) -> Reader<'a> {
90        let mut reader = Reader::new_with_range(self.current_text(), self.current_range());
91        if let Some(prev) = &self.text[..self.current_buffer_byte_pos]
92            .chars()
93            .next_back()
94        {
95            reader.prev = *prev;
96        }
97        self.reset_buff();
98        reader
99    }
100
101    pub fn is_eof(&self) -> bool {
102        self.current == EOF
103    }
104
105    pub fn is_start_of_line(&self) -> bool {
106        self.current_buffer_byte_pos == 0
107    }
108
109    pub fn prev_char(&self) -> char {
110        self.prev
111    }
112
113    pub fn current_char(&self) -> char {
114        self.current
115    }
116
117    pub fn next_char(&mut self) -> char {
118        self.next
119    }
120
121    pub fn current_range(&self) -> SourceRange {
122        SourceRange::new(
123            self.valid_range.start_offset + self.current_buffer_byte_pos,
124            self.current_buffer_byte_len,
125        )
126    }
127
128    pub fn tail_range(&self) -> SourceRange {
129        self.valid_range
130            .moved(self.current_buffer_byte_pos + self.current_buffer_byte_len)
131    }
132
133    pub fn current_text(&self) -> &'a str {
134        &self.text[self.current_buffer_byte_pos
135            ..(self.current_buffer_byte_pos + self.current_buffer_byte_len)]
136    }
137
138    pub fn tail_text(&self) -> &'a str {
139        &self.text[self.current_buffer_byte_pos + self.current_buffer_byte_len..]
140    }
141
142    pub fn eat_when(&mut self, ch: char) -> usize {
143        let mut count = 0;
144        while !self.is_eof() && self.current_char() == ch {
145            count += 1;
146            self.bump();
147        }
148        count
149    }
150
151    pub fn consume_char_n_times(&mut self, ch: char, count: usize) -> usize {
152        let mut eaten = 0;
153        while !self.is_eof() && self.current_char() == ch && eaten < count {
154            eaten += 1;
155            self.bump();
156        }
157        eaten
158    }
159
160    pub fn consume_n_times<F>(&mut self, func: F, count: usize) -> usize
161    where
162        F: Fn(char) -> bool,
163    {
164        let mut eaten = 0;
165        while !self.is_eof() && func(self.current_char()) && eaten < count {
166            eaten += 1;
167            self.bump();
168        }
169        eaten
170    }
171
172    pub fn eat_while<F>(&mut self, func: F) -> usize
173    where
174        F: Fn(char) -> bool,
175    {
176        let mut count = 0;
177        while !self.is_eof() && func(self.current_char()) {
178            count += 1;
179            self.bump();
180        }
181        count
182    }
183
184    pub fn eat_till_end(&mut self) -> usize {
185        self.eat_while(|_| true)
186    }
187
188    pub fn get_source_text(&self) -> &'a str {
189        self.text
190    }
191
192    pub fn get_current_end_pos(&self) -> usize {
193        self.current_buffer_byte_pos + self.current_buffer_byte_len
194    }
195}
196
197#[cfg(test)]
198mod tests {
199    use super::*;
200
201    #[test]
202    fn test_new_reader() {
203        let text = "Hello, world!";
204        let mut reader = Reader::new(text);
205        reader.reset_buff();
206        assert_eq!(reader.current_char(), 'H');
207    }
208
209    #[test]
210    fn test_bump() {
211        let text = "Hello, world!";
212        let mut reader = Reader::new(text);
213        reader.reset_buff();
214        reader.bump();
215        assert_eq!(reader.current_char(), 'e');
216    }
217
218    #[test]
219    fn test_reset_buff() {
220        let text = "Hello, world!";
221        let mut reader = Reader::new(text);
222        reader.reset_buff();
223        reader.bump();
224        reader.reset_buff();
225        assert_eq!(reader.current_char(), 'e');
226        assert!(!reader.is_start_of_line());
227        assert!(!reader.is_eof());
228    }
229
230    #[test]
231    fn test_is_eof() {
232        let text = "H";
233        let mut reader = Reader::new(text);
234        reader.reset_buff();
235        assert!(!reader.is_eof());
236        reader.bump();
237        assert!(reader.is_eof());
238    }
239
240    #[test]
241    fn test_next_char() {
242        let text = "Hello, world!";
243        let mut reader = Reader::new(text);
244        reader.reset_buff();
245        assert_eq!(reader.next_char(), 'e');
246    }
247
248    #[test]
249    fn test_saved_range() {
250        let text = "Hello, world!";
251        let mut reader = Reader::new(text);
252        reader.reset_buff();
253        reader.bump();
254        let range = reader.current_range();
255        assert_eq!(range.start_offset, 0);
256        assert_eq!(range.length, 1);
257
258        reader.reset_buff();
259        reader.bump();
260        let range2 = reader.current_range();
261        assert_eq!(range2.start_offset, 1);
262        assert_eq!(range2.length, 1);
263    }
264
265    #[test]
266    fn test_current_saved_text() {
267        let text = "Hello, world!";
268        let mut reader = Reader::new(text);
269        reader.reset_buff();
270        reader.bump();
271        assert_eq!(reader.current_text(), "H");
272    }
273
274    #[test]
275    fn test_eat_when() {
276        let text = "aaaHello, world!";
277        let mut reader = Reader::new(text);
278        reader.reset_buff();
279        let count = reader.eat_when('a');
280        assert_eq!(count, 3);
281        assert_eq!(reader.current_char(), 'H');
282        assert_eq!(reader.current_text(), "aaa");
283    }
284
285    #[test]
286    fn test_eat_while() {
287        let text = "12345Hello, world!";
288        let mut reader = Reader::new(text);
289        reader.reset_buff();
290        let count = reader.eat_while(|c| c.is_ascii_digit());
291        assert_eq!(count, 5);
292        assert_eq!(reader.current_char(), 'H');
293    }
294}