1pub use regex::Regex;
2
3#[derive(PartialEq, Debug)]
4pub struct Scanner {
5 cursor: usize,
6 characters: Vec<char>,
7}
8
9#[derive(PartialEq, Debug)]
10pub enum ScanError {
11 EndOfLine, InvalidRegexCaptureConversion, }
14
15impl From<regex::Error> for ScanError {
16 fn from(_err: regex::Error) -> ScanError {
17 ScanError::InvalidRegexCaptureConversion
18 }
19}
20
21#[derive(Eq, Debug, Clone)]
22pub struct ScannerPos {
23 pub cursor: usize,
24}
25
26impl From<ScannerPos> for usize {
27 fn from(value: ScannerPos) -> Self {
28 value.cursor
29 }
30}
31
32impl PartialEq for ScannerPos {
33 fn eq(&self, other: &Self) -> bool {
34 self.cursor == other.cursor
35 }
36}
37
38impl PartialOrd for ScannerPos {
39 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
40 Some(self.cmp(other))
41 }
42}
43impl Ord for ScannerPos {
44 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
45 self.cursor.cmp(&other.cursor)
46 }
47}
48
49pub struct ErrorContext {
50 pub context: String,
51 pub line: u32,
52 pub column: u32,
53}
54
55#[derive(Debug)]
56pub struct LineIterator<'a> {
57 cursor: usize,
58 characters: &'a [char],
59}
60
61impl<'a> LineIterator<'a> {
62 pub fn take_while_peek<P>(&self, predicate: P) -> (Vec<String>, usize)
63 where
64 Self: Sized,
65 P: Fn(&str) -> bool,
66 {
67 let len = self.characters.len();
68 let mut cursor = self.cursor;
69 let mut peek_cursor = self.cursor;
70
71 let mut lines: Vec<String> = Vec::new();
72
73 loop {
74 if peek_cursor >= len {
75 break;
76 }
77 if self.characters[peek_cursor] == '\n' {
78 let line = &self.characters[cursor..peek_cursor];
79 let line = line.iter().collect::<String>();
80 if !predicate(&line) {
81 return (lines, cursor);
82 }
83 lines.push(line);
84 cursor = peek_cursor + 1;
85 }
86
87 peek_cursor += 1;
88 }
89
90 (lines, cursor)
91 }
92}
93
94impl<'a> Iterator for LineIterator<'a> {
95 type Item = String;
96 fn next(&mut self) -> Option<Self::Item> {
97 let len: usize = self.characters.len();
98 if self.cursor >= len {
99 return None;
100 }
101 let mut peek_cursor: usize = self.cursor;
102 loop {
103 if peek_cursor >= len || self.characters[peek_cursor] == '\n' {
104 let result = self.characters[self.cursor..peek_cursor]
105 .iter()
106 .collect::<String>();
107 self.cursor = peek_cursor;
108 return Some(result);
109 }
110 peek_cursor += 1;
111 }
112 }
113}
114
115pub const WS_CHARS: [char; 4] = [' ', '\t', '\r', '\u{000C}'];
117
118impl Scanner {
119 pub fn new(string: &str) -> Scanner {
120 Scanner {
121 cursor: 0,
122 characters: string.chars().collect(),
123 }
124 }
125
126 pub fn iter_at_pos(&mut self) -> LineIterator {
127 LineIterator {
128 characters: &self.characters[..],
129 cursor: self.cursor,
130 }
131 }
132
133 pub fn set_pos<T: Into<usize>>(&mut self, position: T) {
134 self.cursor = position.into();
135 }
136
137 pub fn get_pos(&self) -> ScannerPos {
138 ScannerPos {
139 cursor: self.cursor,
140 }
141 }
142
143 pub fn get_cursor(&self) -> usize {
144 self.cursor
145 }
146
147 pub fn get_error_context(&self, start_pos: usize, end_pos: Option<usize>) -> ErrorContext {
148 let mut line = 0;
149 let mut last_newline_pos = 0;
150 for (index, char) in self.characters[..start_pos].iter().enumerate() {
151 if char == &'\n' {
152 line += 1;
153 last_newline_pos = index;
154 }
155 }
156
157 let column = start_pos - last_newline_pos;
158
159 let context = if let Some(end_pos) = end_pos {
160 self.characters[start_pos..end_pos]
161 .iter()
162 .collect::<String>()
163 } else {
164 self.characters[last_newline_pos..start_pos]
165 .iter()
166 .collect::<String>()
167 };
168
169 ErrorContext {
170 line,
171 column: column as u32,
172 context,
173 }
174 }
175
176 pub fn get_from_to<S: Into<usize>, E: Into<usize>>(&self, start: S, end: E) -> String {
177 let start: usize = start.into();
178 let end = end.into();
179 if start == end {
180 return String::new();
181 }
182 self.characters[start..end].iter().collect::<String>()
183 }
184
185 pub fn peek(&self) -> Option<&char> {
187 self.characters.get(self.cursor)
188 }
189
190 pub fn peek_n(&self, num: usize) -> Option<Vec<char>> {
192 if self.cursor + num > self.characters.len() {
193 return None;
194 }
195 Some(self.characters[self.cursor..(self.cursor + num)].to_vec())
196 }
197
198 pub fn is_done(&self) -> bool {
200 self.cursor >= self.characters.len()
201 }
202
203 pub fn next_char(&mut self) -> Option<&char> {
206 match self.characters.get(self.cursor) {
207 Some(character) => {
208 self.cursor += 1;
209 Some(character)
210 }
211 None => None,
212 }
213 }
214
215 pub fn take(&mut self, character: &char) -> bool {
218 match self.characters.get(self.cursor) {
219 Some(current) => {
220 if current == character {
221 self.cursor += 1;
222 true
223 } else {
224 false
225 }
226 }
227 None => false,
228 }
229 }
230
231 #[allow(dead_code)]
234 pub fn peek_skip_ws(&self) -> Option<char> {
235 let mut peek_cursor = self.cursor;
236 loop {
238 if peek_cursor >= self.characters.len() {
239 return None;
240 }
241 let char: char = self.characters[peek_cursor];
242 if !WS_CHARS.iter().any(|ch| *ch == char) {
243 return Some(self.characters[peek_cursor]);
244 }
245 peek_cursor += 1;
246 }
247 }
248
249 pub fn skip_ws(&mut self) {
251 loop {
252 if self.cursor >= self.characters.len() {
253 return;
254 }
255 let char: char = self.characters[self.cursor];
256
257 if !WS_CHARS.iter().any(|ch| *ch == char) {
258 return;
259 }
260 self.cursor += 1;
261 }
262 }
263
264 pub fn skip_empty_lines(&mut self) {
266 loop {
267 match self.peek() {
268 Some('\n') => {
269 self.next_char();
270 }
271 _ => return,
272 }
273 }
274 }
275
276 pub fn skip_empty_lines_and_ws(&mut self) {
277 loop {
278 let pos = self.get_pos();
279 self.skip_empty_lines();
280 self.skip_ws();
281 if self.get_pos().cursor == pos.cursor {
282 break;
283 }
284 }
285 }
286
287 pub fn match_str_forward(&mut self, str: &str) -> bool {
294 let chars = str.chars().collect::<Vec<char>>();
295 let sequence = chars.as_slice();
296
297 let mut peek_cursor = self.cursor;
298 let mut sequence_cursor = 0;
299 let seq_len = sequence.len();
300 let end_index = self.characters.len();
301
302 let matches_str = loop {
303 if sequence_cursor >= seq_len {
304 break true;
305 }
306 if peek_cursor >= end_index {
307 break false;
308 }
309 let current_char: char = self.characters[peek_cursor];
310 if current_char != sequence[sequence_cursor] {
311 break false;
312 }
313 sequence_cursor += 1;
314 peek_cursor += 1;
315 };
316 if matches_str {
317 self.cursor = peek_cursor;
318 }
319 matches_str
320 }
321
322 pub fn seek_return(&mut self, character: &char) -> Result<String, ScanError> {
323 let start: usize = self.cursor;
324 loop {
325 if self.cursor >= self.characters.len() {
326 return Err(ScanError::EndOfLine);
327 }
328 if self.characters[self.cursor] == *character {
329 let string = self.characters[start..self.cursor].iter().collect();
330 self.cursor += 1;
331 return Ok(string);
332 }
333 self.cursor += 1;
334 }
335 }
336
337 pub fn match_regex_forward(
344 &mut self,
345 user_regex_str: &str,
346 ) -> Result<Option<Vec<String>>, ScanError> {
347 if self.cursor >= self.characters.len() {
348 return Err(ScanError::EndOfLine);
349 }
350
351 let mut regex_str: String = user_regex_str.to_owned();
354 if !regex_str.starts_with('^') {
355 regex_str = format!("^{}", user_regex_str);
356 }
357 let regex = regex::bytes::Regex::new(®ex_str)?;
358
359 let string_tmp = self.characters[self.cursor..].iter().collect::<String>();
360 let bytes = string_tmp.as_bytes();
361 return match regex.captures(bytes) {
362 Some(comment_captures) => {
363 let mut str_captures: Vec<String> = Vec::new();
364
365 for (i, capture) in comment_captures.iter().enumerate() {
366 if i == 0 {
369 let matched_str = std::str::from_utf8(capture.unwrap().as_bytes()).unwrap();
370 let num_chars = matched_str.chars().count();
371 self.cursor += num_chars;
372 } else {
373 let capture_bytes: Vec<u8> = capture.unwrap().as_bytes().to_owned();
374 match String::from_utf8(capture_bytes) {
375 Ok(string) => {
376 str_captures.push(string);
377 }
378 Err(_) => return Err(ScanError::InvalidRegexCaptureConversion),
379 }
380 }
381 }
382 return Ok(Some(str_captures));
383 }
384 None => Ok(None),
385 };
386 }
387
388 pub fn get_line_and_advance(&mut self) -> Option<String> {
390 let mut peek_cursor = self.cursor;
391 let num_chars = self.characters.len();
392 if self.is_done() {
393 return None;
394 }
395
396 let line = loop {
397 if peek_cursor >= num_chars || self.characters[peek_cursor] == '\n' {
398 break self.characters[self.cursor..peek_cursor]
399 .iter()
400 .collect::<String>();
401 }
402 peek_cursor += 1;
403 };
404
405 if peek_cursor < num_chars {
407 peek_cursor += 1;
408 }
409
410 self.cursor = peek_cursor;
411
412 Some(line)
413 }
414
415 pub fn peek_line(&mut self) -> Option<String> {
416 if self.is_done() {
417 return None;
418 }
419
420 let mut peek_cursor = self.cursor;
421 let len = self.characters.len();
422
423 while peek_cursor < len && self.characters[peek_cursor] != '\n' {
424 peek_cursor += 1;
425 }
426
427 Some(
428 self.characters[self.cursor..peek_cursor]
429 .iter()
430 .collect::<String>(),
431 )
432 }
433
434 pub fn skip_to_next_line(&mut self) {
435 loop {
436 if self.is_done() {
437 return;
438 }
439 if self.characters[self.cursor] == '\n' {
440 self.cursor += 1;
441 return;
442 }
443 self.cursor += 1;
444 }
445 }
446
447 pub fn get_tokens(&self) -> Vec<String> {
448 self.characters
450 .iter()
451 .collect::<String>()
452 .split_whitespace()
453 .map(|s| s.to_string())
454 .collect()
455 }
456
457 fn get_prev_line_bounds(&self) -> Option<(usize, usize)> {
459 if self.cursor == 0 {
460 return None;
461 }
462 let mut line_end = self.cursor - 1;
463 loop {
464 if line_end == 0 {
466 return None;
467 }
468 if self.characters[line_end] == '\n' {
470 break;
471 }
472
473 line_end -= 1;
474 }
475
476 let mut line_start = line_end - 1;
477 loop {
478 if line_start == 0 {
479 break;
480 }
481 if self.characters[line_start] == '\n' {
482 line_start += 1;
484 break;
485 }
486 line_start -= 1;
487 }
488 if line_start > line_end {
489 line_start = line_end;
490 }
491
492 Some((line_start, line_end))
493 }
494
495 pub fn get_prev_line(&self) -> Option<String> {
497 let (line_start, line_end) = self.get_prev_line_bounds()?;
498 if line_start == line_end {
499 return Some("".to_string());
500 }
501 return Some(
502 self.characters[line_start..line_end]
503 .iter()
504 .collect::<String>(),
505 );
506 }
507
508 pub fn step_to_previous_line_start(&mut self) {
510 if let Some((line_start, _)) = self.get_prev_line_bounds() {
511 self.cursor = line_start;
512 }
513 }
514}
515
516#[allow(dead_code)]
518#[cfg(debug_assertions)]
519impl Scanner {
520 pub fn debug_string(&self) -> String {
521 let before: String = self.characters[..self.cursor].iter().collect();
522
523 let current: String = self
524 .characters
525 .get(self.cursor)
526 .map_or("".to_string(), |c| c.to_string());
527
528 let after: String = if self.cursor >= self.characters.len() - 1 {
529 String::new()
530 } else {
531 self.characters[self.cursor + 1..].iter().collect()
532 };
533 format!("{}[{}]{}", before, current, after)
534 }
535}
536
537#[cfg(test)]
538mod tests {
539
540 use super::*;
541
542 #[test]
543 pub fn seek_return() {
544 let string = "abc def ghi\n\n next line";
545 let mut scanner = Scanner::new(string);
546
547 match scanner.seek_return(&'\n') {
548 Ok(result) => {
549 assert_eq!(result, "abc def ghi");
550 assert_eq!(
551 scanner.cursor, 15,
552 "position should be right after new line"
553 );
554 }
555 err => panic!("invalid result: {:?}", err),
556 }
557
558 match scanner.seek_return(&'\n') {
559 Ok(result) => {
560 assert_eq!(result, "");
561 assert_eq!(scanner.cursor, 16);
562 }
563 err => panic!("invalid result: {:?}", err),
564 }
565 }
566
567 #[test]
568 pub fn seek_return_missing() {
569 let string = "abc def ghi";
570 let mut scanner = Scanner::new(string);
571
572 match scanner.seek_return(&'\n') {
573 Ok(_) => panic!("should not have found missing new line"),
574
575 Err(err) => {
576 assert_eq!(err, ScanError::EndOfLine);
577 }
578 }
579 }
580
581 #[test]
582 pub fn get_line_and_advance() {
583 let string = "First line\n Next Line \n";
584 let mut scanner = Scanner::new(string);
585
586 let line = scanner.get_line_and_advance();
587 assert_eq!(line, Some("First line".to_string()));
588 assert_eq!(scanner.cursor, 11);
589
590 let next = scanner.get_line_and_advance();
591 assert_eq!(next, Some(" Next Line ".to_string()));
592 assert!(scanner.is_done());
593 assert_eq!(scanner.cursor, string.len());
594
595 let next = scanner.get_line_and_advance();
597 assert!(next.is_none());
598 assert!(scanner.is_done());
599 assert!(scanner.cursor == string.len());
600 }
601
602 #[test]
603 pub fn skip_to_next_line() {
604 let string = "First line\nSecond Line\n\n";
605 let mut scanner = Scanner::new(string);
606 assert_eq!(scanner.cursor, 0);
607
608 scanner.skip_to_next_line();
609 assert_eq!(scanner.cursor, 11);
610 scanner.skip_to_next_line();
611 assert_eq!(scanner.cursor, 23);
612 scanner.skip_to_next_line();
613 assert_eq!(scanner.cursor, 24);
614 assert_eq!(scanner.cursor, string.len());
615 assert!(scanner.is_done());
616 }
617
618 #[test]
619 pub fn skip_empty_lines() {
620 let string = "0\n\n\n4";
621 let mut scanner = Scanner::new(string);
622
623 scanner.skip_empty_lines();
624 assert_eq!(scanner.cursor, 0);
625
626 scanner.next_char();
627 assert_eq!(scanner.cursor, 1);
628
629 scanner.skip_empty_lines();
630 assert_eq!(scanner.cursor, 4);
631 }
632
633 #[test]
634 pub fn skip_ws() {
635 let string = "0 \r \t \u{000C} 1";
636 let mut scanner = Scanner::new(string);
637
638 scanner.skip_ws();
640 assert_eq!(scanner.cursor, 0);
641
642 scanner.next_char();
643 scanner.skip_ws();
644 let last_char = scanner.peek().unwrap();
645 assert_eq!(*last_char, '1');
646 }
647
648 #[test]
649 pub fn match_str_forward() {
650 let string = "012 \nTest line";
651 let mut scanner = Scanner::new(string);
652
653 assert!(scanner.match_str_forward("012"));
655 assert_eq!(scanner.cursor, 3);
656
657 assert!(scanner.match_str_forward(" \n"));
658 assert_eq!(scanner.cursor, 7);
659
660 assert!(!scanner.match_str_forward("No match"));
661 assert_eq!(scanner.cursor, 7);
662
663 assert!(scanner.match_str_forward("Test line"));
664 assert!(scanner.is_done());
665
666 assert!(!scanner.match_str_forward("No match"));
667
668 assert!(scanner.match_str_forward(""));
669 }
670
671 #[test]
672 pub fn take() {
673 let string = "0 \n";
674 let mut scanner = Scanner::new(string);
675
676 assert_eq!(scanner.cursor, 0);
677 assert!(scanner.take(&'0'));
678 assert_eq!(scanner.cursor, 1);
679 assert!(scanner.take(&' '));
680 assert_eq!(scanner.cursor, 2);
681 assert!(scanner.take(&'\n'));
682 assert_eq!(scanner.cursor, 3);
683 assert!(scanner.is_done());
684 assert!(!scanner.take(&' '));
685 }
686
687 #[test]
688 pub fn peek() {
689 let string = "0 \n";
690 let mut scanner = Scanner::new(string);
691
692 assert_eq!(scanner.peek(), Some(&'0'));
693 assert_eq!(scanner.cursor, 0);
694
695 scanner.next_char();
696 assert_eq!(scanner.peek(), Some(&' '));
697 assert_eq!(scanner.cursor, 1);
698
699 scanner.next_char();
700 assert_eq!(scanner.peek(), Some(&'\n'));
701 assert_eq!(scanner.cursor, 2);
702
703 scanner.next_char();
704
705 assert_eq!(scanner.peek(), None);
707 assert!(scanner.is_done());
708 }
709
710 #[test]
711 pub fn match_regex_forward_only_at_start() {
712 let string = "### 000 123 456 ";
713 let mut scanner = Scanner::new(string);
714
715 let matches = scanner.match_regex_forward("123").unwrap();
718 assert_eq!(matches, None);
719 let mut scanner = Scanner::new(string);
720 let matches = scanner.match_regex_forward("^123").unwrap();
721 assert_eq!(matches, None);
722
723 let mut scanner = Scanner::new(string);
726 let matches = scanner.match_regex_forward("###").unwrap().unwrap();
727 assert_eq!(matches.len(), 0);
728
729 let mut scanner = Scanner::new(string);
730 let matches = scanner.match_regex_forward("^###").unwrap().unwrap();
731 assert_eq!(matches.len(), 0);
732
733 let mut scanner = Scanner::new(string);
735 let matches = scanner
736 .match_regex_forward("### (\\d\\d\\d)")
737 .unwrap()
738 .unwrap();
739 assert_eq!(matches, vec!["000"]);
740
741 let mut scanner = Scanner::new(string);
744 scanner.match_str_forward("### ");
745 let matches = scanner.match_regex_forward("###").unwrap();
746 assert_eq!(matches, None);
747 let matches = scanner.match_regex_forward("###").unwrap();
749 assert_eq!(matches, None);
750 let matches = scanner.match_regex_forward("(000)").unwrap().unwrap();
752 assert_eq!(matches, vec!["000"]);
753 }
754
755 #[test]
756 pub fn match_regex_forward_no_captures() {
757 let string = "000 123 456 | abc def ghi | \n\t\r\n end";
758 let mut scanner = Scanner::new(string);
759
760 let mut matches = scanner
762 .match_regex_forward("[0-9]{3} [0-9]{3} 456")
763 .unwrap()
764 .unwrap();
765 let empty: Vec<String> = Vec::new();
766 assert_eq!(matches, empty);
767
768 _ = scanner.match_regex_forward(" \\| ");
769
770 matches = scanner
771 .match_regex_forward("(abc) [a-z]{3} (ghi)")
772 .unwrap()
773 .unwrap();
774 assert_eq!(matches, vec!["abc", "ghi"]);
775
776 _ = scanner.match_regex_forward(" \\| ");
777
778 matches = scanner.match_regex_forward("\n(\t\r)\n ").unwrap().unwrap();
779
780 assert_eq!(matches, ["\t\r".to_string()]);
781 }
782
783 #[test]
784 pub fn get_prev_line_bounds() {
785 let string = "abc\ndef\n\n\n";
786 let mut scanner = Scanner::new(string);
787 assert_eq!(scanner.get_prev_line_bounds(), None);
788
789 scanner.skip_to_next_line();
790 assert_eq!(scanner.get_prev_line_bounds(), Some((0, 3)));
791 scanner.skip_to_next_line();
792 assert_eq!(scanner.get_prev_line_bounds(), Some((4, 7)));
793 }
794}