1#[derive(Debug)]
3pub struct Rlex {
4 source: String,
5 chars: Vec<char>,
6 position: usize,
7 max_position: usize,
8 marked_position: usize,
9}
10
11impl Rlex {
12 pub fn new(source: &str) -> Result<Rlex, String> {
13 if source.is_empty() {
14 return Err("MALFORMED INPUT: rlex does not accept empty strings".to_owned());
15 }
16 let chars: Vec<char> = source.chars().collect();
17 let length = chars.len();
18 let rlex = Rlex {
19 source: source.to_owned(),
20 chars,
21 position: 0,
22 max_position: length - 1,
23 marked_position: 0,
24 };
25 Ok(rlex)
26 }
27
28 pub fn pos(&self) -> usize {
29 return self.position;
30 }
31
32 pub fn next(&mut self) -> &Rlex {
33 if self.position < self.max_position {
34 self.position += 1;
35 }
36 return self;
37 }
38
39 pub fn next_by(&mut self, by: usize) -> &Rlex {
40 let mut count = 0;
41 while count != by {
42 self.next();
43 count += 1;
44 }
45 return self
46 }
47
48 pub fn next_until(&mut self, search: char) -> &Rlex {
49 while self.char() != search {
50 if self.at_end() {
51 break;
52 }
53 self.next();
54 }
55 return self;
56 }
57
58 pub fn next_is(&mut self, check: char) -> bool {
59 return self.peek() == check
60 }
61
62 pub fn next_by_is(&mut self, check: char, by: usize) -> bool {
63 return self.peek_by(by) == check
64 }
65
66 pub fn prev(&mut self) -> &Rlex {
67 if self.position > 0 {
68 self.position -= 1;
69 }
70 return self;
71 }
72
73 pub fn prev_by(&mut self, mut by: usize) -> &Rlex {
74 while by != 0 {
75 self.prev();
76 by -= 1;
77 }
78 return self;
79 }
80
81 pub fn prev_until(&mut self, search: char) -> &Rlex {
82 while self.char() != search {
83 if self.at_start() {
84 break;
85 }
86 self.prev();
87 }
88 return self;
89 }
90
91 pub fn prev_is(&mut self, check: char) -> bool {
92 return self.peek_back() == check
93 }
94
95 pub fn prev_by_is(&mut self, check: char, by: usize) -> bool {
96 return self.peek_back_by(by) == check
97 }
98
99
100 pub fn char(&self) -> char {
101 return self.chars[self.position];
102 }
103
104 pub fn at_end(&mut self) -> bool {
105 return self.position == self.max_position;
106 }
107
108 pub fn at_start(&mut self) -> bool {
109 return self.position == 0;
110 }
111
112 pub fn at_mark(&mut self) -> bool {
113 return self.position == self.marked_position;
114 }
115
116 pub fn mark(&mut self) -> &Rlex {
117 self.marked_position = self.position;
118 return self;
119 }
120
121 pub fn goto_pos(&mut self, pos: usize) -> &Rlex {
122 if pos > self.max_position {
123 self.position = self.max_position;
124 return self;
125 }
126 self.position = pos;
127 return self;
128 }
129
130 pub fn goto_mark(&mut self) -> &Rlex {
131 self.position = self.marked_position;
132 return self;
133 }
134
135 pub fn goto_start(&mut self) -> &Rlex {
136 self.position = 0;
137 return self;
138 }
139
140 pub fn goto_end(&mut self) -> &Rlex {
141 self.position = self.max_position;
142 return self;
143 }
144
145 pub fn peek(&mut self) -> char {
146 let start = self.position;
147 self.next();
148 let ch = self.char();
149 self.goto_pos(start);
150 return ch;
151 }
152
153 pub fn peek_by(&mut self, by: usize) -> char {
154 let start = self.position;
155 self.next_by(by);
156 let ch = self.char();
157 self.goto_pos(start);
158 return ch;
159 }
160
161 pub fn peek_back(&mut self) -> char {
162 let start = self.position;
163 self.prev();
164 let ch = self.char();
165 self.goto_pos(start);
166 return ch;
167 }
168
169 pub fn peek_back_by(&mut self, by: usize) -> char {
170 let start = self.position;
171 self.prev_by(by);
172 let ch = self.char();
173 self.goto_pos(start);
174 return ch;
175 }
176
177 pub fn str_from_mark(&self) -> &str {
178 let (start, end) = if self.marked_position <= self.position {
179 (self.marked_position, self.position)
180 } else {
181 (self.position, self.marked_position)
182 };
183 let start_byte = self.chars[..start]
184 .iter()
185 .map(|c| c.len_utf8())
186 .sum::<usize>();
187
188 let byte_len = self.chars[start..=end]
189 .iter()
190 .map(|c| c.len_utf8())
191 .sum::<usize>();
192
193 &self.source[start_byte..start_byte + byte_len]
194 }
195
196 pub fn str_from_start(&self) -> &str {
197 let start = 0;
198 let end = self.position.min(self.max_position) + 1;
199 let start_byte = self.chars[start..end]
200 .iter()
201 .map(|c| c.len_utf8())
202 .take(start)
203 .sum::<usize>();
204 let byte_len = self.chars[start..end]
205 .iter()
206 .map(|c| c.len_utf8())
207 .sum::<usize>();
208 &self.source[start_byte..start_byte + byte_len]
209 }
210
211 pub fn str_from_end(&self) -> &str {
212 let start = self.position;
213 let end = self.max_position + 1;
214 let start_byte = self.chars[..start]
215 .iter()
216 .map(|c| c.len_utf8())
217 .sum::<usize>();
218 let byte_len = self.chars[start..end]
219 .iter()
220 .map(|c| c.len_utf8())
221 .sum::<usize>();
222 &self.source[start_byte..start_byte + byte_len]
223 }
224
225 pub fn is_in_quote(&self) -> bool {
226 let mut in_big_quote = false;
227 let mut in_lil_quote = false;
228 let mut escaped = false;
229 for c in self.str_from_start().chars() {
230 if escaped {
231 escaped = false;
232 continue;
233 }
234 if c == '\\' {
235 escaped = true;
236 } else if c == '"' {
237 in_big_quote = !in_big_quote;
238 } else if c == '\'' {
239 in_lil_quote = !in_lil_quote;
240 }
241 }
242 in_big_quote || in_lil_quote
243 }
244
245
246}
247
248#[cfg(test)]
249mod tests {
250 use super::*;
251
252 #[test]
253 fn test_empty_rlex_throws_error() {
254 let rlex = Rlex::new("");
255 if rlex.is_ok() {
256 panic!("rlex should not accept empty strings");
257 }
258 assert!(rlex.is_err());
259 }
260
261 #[test]
262 fn test_rlex_next_and_prev() {
263 let mut r = Rlex::new("abcd").unwrap();
264 assert_eq!(r.char(), 'a');
265 r.next();
266 assert_eq!(r.char(), 'b');
267 r.next();
268 assert_eq!(r.char(), 'c');
269 r.next();
270 assert_eq!(r.char(), 'd');
271 r.next();
272 assert_eq!(r.char(), 'd');
273 r.next();
274 assert_eq!(r.char(), 'd');
275 r.prev();
276 assert_eq!(r.char(), 'c');
277 r.prev();
278 assert_eq!(r.char(), 'b');
279 r.prev();
280 assert_eq!(r.char(), 'a');
281 r.prev();
282 assert_eq!(r.char(), 'a');
283 r.prev();
284 assert_eq!(r.char(), 'a');
285 }
286
287 #[test]
288 fn test_rlex_at_start_and_at_end() {
289 let mut r = Rlex::new("abcd").unwrap();
290 while !r.at_end() {
291 r.next();
292 }
293 assert!(r.at_end());
294 while !r.at_start() {
295 r.prev();
296 }
297 assert!(r.at_start());
298 }
299
300 #[test]
301 fn test_rlex_next_by() {
302 let mut r = Rlex::new("abcd").unwrap();
303 r.next_by(0);
304 assert!(r.char() == 'a');
305 r.next_by(1);
306 assert!(r.char() == 'b');
307 r.goto_start();
308 r.next_by(2);
309 assert!(r.char() == 'c');
310 r.goto_start();
311 r.next_by(3);
312 assert!(r.char() == 'd');
313 r.goto_start();
314 r.next_by(4);
315 assert!(r.char() == 'd');
316 }
317
318 #[test]
319 fn test_rlex_peek() {
320 let mut r = Rlex::new("abcd").unwrap();
321 assert!(r.peek() == 'b');
322 r.goto_end();
323 assert!(r.peek() == 'd');
324 }
325
326 #[test]
327 fn test_rlex_peek_by() {
328 let mut r = Rlex::new("abcd").unwrap();
329 assert!(r.peek_by(0) == 'a');
330 assert!(r.peek_by(1) == 'b');
331 assert!(r.peek_by(2) == 'c');
332 assert!(r.peek_by(3) == 'd');
333 assert!(r.peek_by(4) == 'd');
334 }
335
336 #[test]
337 fn test_rlex_peek_back() {
338 let mut r = Rlex::new("abcd").unwrap();
339 r.goto_end();
340 assert!(r.peek_back() == 'c');
341 r.goto_start();
342 assert!(r.peek_back() == 'a');
343 }
344
345 #[test]
346 fn test_rlex_peek_back_by() {
347 let mut r = Rlex::new("abcd").unwrap();
348 r.goto_end();
349 assert!(r.peek_back_by(0) == 'd');
350 assert!(r.peek_back_by(1) == 'c');
351 assert!(r.peek_back_by(2) == 'b');
352 assert!(r.peek_back_by(3) == 'a');
353 assert!(r.peek_back_by(4) == 'a');
354 }
355
356 #[test]
357 fn test_rlex_dump() {
358 let mut r = Rlex::new("abcd").unwrap();
359 r.next();
360 assert!(r.str_from_start() == "ab");
361 r.goto_end();
362 assert!(r.str_from_start() == "abcd");
363 r.prev();
364 r.mark();
365 r.next();
366 assert!(r.str_from_mark() == "cd");
367 r.goto_start();
368 assert!(r.str_from_end() == "abcd");
369 r.next();
370 assert!(r.str_from_end() == "bcd");
371 r.next();
372 assert!(r.str_from_end() == "cd");
373 r.next();
374 assert!(r.str_from_end() == "d");
375 }
376
377 #[test]
378 fn test_rlex_is_in_quote() {
379 let mut r = Rlex::new("\"Hello, I am Quoted!\"").unwrap();
380 while !r.at_end() {
381 assert!(r.is_in_quote());
382 r.next();
383 }
384 let mut r = Rlex::new("Hello, I am not Quoted!").unwrap();
385 while !r.at_end() {
386 assert!(!r.is_in_quote());
387 r.next();
388 }
389 let mut r = Rlex::new("<p name='bob'>").unwrap();
390 r.next_until('b');
391 assert!(r.is_in_quote());
392 }
393
394 #[test]
395 fn test_rlex_next_until_and_prev_until() {
396 let mut r = Rlex::new("abcd").unwrap();
397 r.next_until('c');
398 assert!(r.pos() == 2);
399 r.next();
400 r.prev_until('b');
401 assert!(r.pos() == 1);
402 }
403
404 #[test]
405 fn test_rlex_surrounding_comparisons() {
406 let mut r = Rlex::new("abcd").unwrap();
407 assert!(r.next_is('b'));
408 assert!(r.next_by_is('a', 0));
409 assert!(r.next_by_is('b', 1));
410 assert!(r.next_by_is('c', 2));
411 assert!(r.next_by_is('d', 3));
412 assert!(r.next_by_is('d', 4));
413 r.goto_end();
414 assert!(r.prev_is('c'));
415 assert!(r.prev_by_is('d', 0));
416 assert!(r.prev_by_is('c', 1));
417 assert!(r.prev_by_is('b', 2));
418 assert!(r.prev_by_is('a', 3));
419 assert!(r.prev_by_is('a', 4));
420 }
421
422}