rlex/
lib.rs

1// Represents a lexer that can traverse, peek, and stash characters from a string source
2#[derive(Debug)]
3pub struct Rlex {
4    source: String,
5    chars: Vec<char>,
6    position: usize,
7    max_position: usize,
8    marked_position: usize,
9}
10
11impl Rlex {
12    pub fn new(source: &str) -> Result<Rlex, String> {
13        if source.is_empty() {
14            return Err("MALFORMED INPUT: rlex does not accept empty strings".to_owned());
15        }
16        let chars: Vec<char> = source.chars().collect();
17        let length = chars.len();
18        let rlex = Rlex {
19            source: source.to_owned(),
20            chars,
21            position: 0,
22            max_position: length - 1,
23            marked_position: 0,
24        };
25        Ok(rlex)
26    }
27
28    pub fn pos(&self) -> usize {
29        return self.position;
30    }
31
32    pub fn next(&mut self) -> &Rlex {
33        if self.position < self.max_position {
34            self.position += 1;
35        }
36        return self;
37    }
38
39    pub fn next_by(&mut self, by: usize) -> &Rlex {
40        let mut count = 0;
41        while count != by {
42            self.next();
43            count += 1;
44        }
45        return self
46    }
47
48    pub fn next_until(&mut self, search: char) -> &Rlex {
49        while self.char() != search {
50            if self.at_end() {
51                break;
52            }
53            self.next();
54        }
55        return self;
56    }
57
58    pub fn next_is(&mut self, check: char) -> bool {
59        return self.peek() == check
60    }
61
62    pub fn next_by_is(&mut self, check: char, by: usize) -> bool {
63        return self.peek_by(by) == check
64    }
65
66    pub fn prev(&mut self) -> &Rlex {
67        if self.position > 0 {
68            self.position -= 1;
69        }
70        return self;
71    }
72
73    pub fn prev_by(&mut self, mut by: usize) -> &Rlex {
74        while by != 0 {
75            self.prev();
76            by -= 1;
77        }
78        return self;
79    }
80
81    pub fn prev_until(&mut self, search: char) -> &Rlex {
82        while self.char() != search {
83            if self.at_start() {
84                break;
85            }
86            self.prev();
87        }
88        return self;
89    }
90
91    pub fn prev_is(&mut self, check: char) -> bool {
92        return self.peek_back() == check
93    }
94
95    pub fn prev_by_is(&mut self, check: char, by: usize) -> bool {
96        return self.peek_back_by(by) == check
97    }
98
99
100    pub fn char(&self) -> char {
101        return self.chars[self.position];
102    }
103
104    pub fn at_end(&mut self) -> bool {
105        return self.position == self.max_position;
106    }
107
108    pub fn at_start(&mut self) -> bool {
109        return self.position == 0;
110    }
111
112    pub fn at_mark(&mut self) -> bool {
113        return self.position == self.marked_position;
114    }
115
116    pub fn mark(&mut self) -> &Rlex {
117        self.marked_position = self.position;
118        return self;
119    }
120
121    pub fn goto_pos(&mut self, pos: usize) -> &Rlex {
122        if pos > self.max_position {
123            self.position = self.max_position;
124            return self;
125        }
126        self.position = pos;
127        return self;
128    }
129
130    pub fn goto_mark(&mut self) -> &Rlex {
131        self.position = self.marked_position;
132        return self;
133    }
134
135    pub fn goto_start(&mut self) -> &Rlex {
136        self.position = 0;
137        return self;
138    }
139
140    pub fn goto_end(&mut self) -> &Rlex {
141        self.position = self.max_position;
142        return self;
143    }
144
145    pub fn peek(&mut self) -> char {
146        let start = self.position;
147        self.next();
148        let ch = self.char();
149        self.goto_pos(start);
150        return ch;
151    }
152
153    pub fn peek_by(&mut self, by: usize) -> char {
154        let start = self.position;
155        self.next_by(by);
156        let ch = self.char();
157        self.goto_pos(start);
158        return ch;
159    }
160
161    pub fn peek_back(&mut self) -> char {
162        let start = self.position;
163        self.prev();
164        let ch = self.char();
165        self.goto_pos(start);
166        return ch;
167    }
168
169    pub fn peek_back_by(&mut self, by: usize) -> char {
170        let start = self.position;
171        self.prev_by(by);
172        let ch = self.char();
173        self.goto_pos(start);
174        return ch;
175    }
176
177    pub fn str_from_mark(&self) -> &str {
178        let (start, end) = if self.marked_position <= self.position {
179            (self.marked_position, self.position)
180        } else {
181            (self.position, self.marked_position)
182        };
183        let start_byte = self.chars[..start]
184            .iter()
185            .map(|c| c.len_utf8())
186            .sum::<usize>();
187
188        let byte_len = self.chars[start..=end]
189            .iter()
190            .map(|c| c.len_utf8())
191            .sum::<usize>();
192
193        &self.source[start_byte..start_byte + byte_len]
194    }
195
196    pub fn str_from_start(&self) -> &str {
197        let start = 0;
198        let end = self.position.min(self.max_position) + 1;
199        let start_byte = self.chars[start..end]
200            .iter()
201            .map(|c| c.len_utf8())
202            .take(start)
203            .sum::<usize>();
204        let byte_len = self.chars[start..end]
205            .iter()
206            .map(|c| c.len_utf8())
207            .sum::<usize>();
208        &self.source[start_byte..start_byte + byte_len]
209    }
210
211    pub fn str_from_end(&self) -> &str {
212        let start = self.position;
213        let end = self.max_position + 1;
214        let start_byte = self.chars[..start]
215            .iter()
216            .map(|c| c.len_utf8())
217            .sum::<usize>();
218        let byte_len = self.chars[start..end]
219            .iter()
220            .map(|c| c.len_utf8())
221            .sum::<usize>();
222        &self.source[start_byte..start_byte + byte_len]
223    }
224
225    pub fn is_in_quote(&self) -> bool {
226        let mut in_big_quote = false;
227        let mut in_lil_quote = false;
228        let mut escaped = false;
229        for c in self.str_from_start().chars() {
230            if escaped {
231                escaped = false;
232                continue;
233            }
234            if c == '\\' {
235                escaped = true;
236            } else if c == '"' {
237                in_big_quote = !in_big_quote;
238            } else if c == '\'' {
239                in_lil_quote = !in_lil_quote;
240            }
241        }
242        in_big_quote || in_lil_quote
243    }
244
245
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251
252    #[test]
253    fn test_empty_rlex_throws_error() {
254        let rlex = Rlex::new("");
255        if rlex.is_ok() {
256            panic!("rlex should not accept empty strings");
257        }
258        assert!(rlex.is_err());
259    }
260
261    #[test]
262    fn test_rlex_next_and_prev() {
263        let mut r = Rlex::new("abcd").unwrap();
264        assert_eq!(r.char(), 'a');
265        r.next();
266        assert_eq!(r.char(), 'b');
267        r.next();
268        assert_eq!(r.char(), 'c');
269        r.next();
270        assert_eq!(r.char(), 'd');
271        r.next();
272        assert_eq!(r.char(), 'd');
273        r.next();
274        assert_eq!(r.char(), 'd');
275        r.prev();
276        assert_eq!(r.char(), 'c');
277        r.prev();
278        assert_eq!(r.char(), 'b');
279        r.prev();
280        assert_eq!(r.char(), 'a');
281        r.prev();
282        assert_eq!(r.char(), 'a');
283        r.prev();
284        assert_eq!(r.char(), 'a');
285    }
286
287    #[test]
288    fn test_rlex_at_start_and_at_end() {
289        let mut r = Rlex::new("abcd").unwrap();
290        while !r.at_end() {
291            r.next();
292        }
293        assert!(r.at_end());
294        while !r.at_start() {
295            r.prev();
296        }
297        assert!(r.at_start());
298    }
299
300    #[test]
301    fn test_rlex_next_by() {
302        let mut r = Rlex::new("abcd").unwrap();
303        r.next_by(0);
304        assert!(r.char() == 'a');
305        r.next_by(1);
306        assert!(r.char() == 'b');
307        r.goto_start();
308        r.next_by(2);
309        assert!(r.char() == 'c');
310        r.goto_start();
311        r.next_by(3);
312        assert!(r.char() == 'd');
313        r.goto_start();
314        r.next_by(4);
315        assert!(r.char() == 'd');
316    }
317
318    #[test]
319    fn test_rlex_peek() {
320        let mut r = Rlex::new("abcd").unwrap();
321        assert!(r.peek() == 'b');
322        r.goto_end();
323        assert!(r.peek() == 'd');
324    }
325
326    #[test]
327    fn test_rlex_peek_by() {
328        let mut r = Rlex::new("abcd").unwrap();
329        assert!(r.peek_by(0) == 'a');
330        assert!(r.peek_by(1) == 'b');
331        assert!(r.peek_by(2) == 'c');
332        assert!(r.peek_by(3) == 'd');
333        assert!(r.peek_by(4) == 'd');
334    }
335
336    #[test]
337    fn test_rlex_peek_back() {
338        let mut r = Rlex::new("abcd").unwrap();
339        r.goto_end();
340        assert!(r.peek_back() == 'c');
341        r.goto_start();
342        assert!(r.peek_back() == 'a');
343    }
344
345    #[test]
346    fn test_rlex_peek_back_by() {
347        let mut r = Rlex::new("abcd").unwrap();
348        r.goto_end();
349        assert!(r.peek_back_by(0) == 'd');
350        assert!(r.peek_back_by(1) == 'c');
351        assert!(r.peek_back_by(2) == 'b');
352        assert!(r.peek_back_by(3) == 'a');
353        assert!(r.peek_back_by(4) == 'a');
354    }
355
356    #[test]
357    fn test_rlex_dump() {
358        let mut r = Rlex::new("abcd").unwrap();
359        r.next();
360        assert!(r.str_from_start() == "ab");
361        r.goto_end();
362        assert!(r.str_from_start() == "abcd");
363        r.prev();
364        r.mark();
365        r.next();
366        assert!(r.str_from_mark() == "cd");
367        r.goto_start();
368        assert!(r.str_from_end() == "abcd");
369        r.next();
370        assert!(r.str_from_end() == "bcd");
371        r.next();
372        assert!(r.str_from_end() == "cd");
373        r.next();
374        assert!(r.str_from_end() == "d");
375    }
376
377    #[test]
378    fn test_rlex_is_in_quote() {
379        let mut r = Rlex::new("\"Hello, I am Quoted!\"").unwrap();
380        while !r.at_end() {
381            assert!(r.is_in_quote());
382            r.next();
383        }
384        let mut r = Rlex::new("Hello, I am not Quoted!").unwrap();
385        while !r.at_end() {
386            assert!(!r.is_in_quote());
387            r.next();
388        }
389        let mut r = Rlex::new("<p name='bob'>").unwrap();
390        r.next_until('b');
391        assert!(r.is_in_quote());
392    }
393
394    #[test]
395    fn test_rlex_next_until_and_prev_until() {
396        let mut r = Rlex::new("abcd").unwrap();
397        r.next_until('c');
398        assert!(r.pos() == 2);
399        r.next();
400        r.prev_until('b');
401        assert!(r.pos() == 1);
402    }
403
404    #[test]
405    fn test_rlex_surrounding_comparisons() {
406        let mut r = Rlex::new("abcd").unwrap();
407        assert!(r.next_is('b'));
408        assert!(r.next_by_is('a', 0));
409        assert!(r.next_by_is('b', 1));
410        assert!(r.next_by_is('c', 2));
411        assert!(r.next_by_is('d', 3));
412        assert!(r.next_by_is('d', 4));
413        r.goto_end();
414        assert!(r.prev_is('c'));
415        assert!(r.prev_by_is('d', 0));
416        assert!(r.prev_by_is('c', 1));
417        assert!(r.prev_by_is('b', 2));
418        assert!(r.prev_by_is('a', 3));
419        assert!(r.prev_by_is('a', 4));
420    }
421
422}