ziyy_core/parser/word_parser/
scanner.rs

1use super::token::Token;
2use crate::common::Span;
3use crate::scanner::{GenericScanner, Source};
4use crate::splitter::fragment::Fragment;
5
6pub struct Scanner {
7    source: Vec<char>,
8    tokens: Vec<Token>,
9    start: usize,
10    current: usize,
11    span: Span,
12}
13
14impl Scanner {
15    pub fn new(mut source: Fragment) -> Self {
16        source.span.tie_start();
17
18        Self {
19            source: source.lexeme.chars().collect(),
20            tokens: vec![],
21            start: 0,
22            current: 0,
23            span: source.span,
24        }
25    }
26
27    fn escape(&mut self) {
28        if self.is_at_end() {
29            return;
30        }
31        let c = self.advance();
32
33        let mut scan_until = |limit: u8, tester: fn(c: char) -> bool| {
34            let mut i = 0;
35            while i < limit && tester(self.peek()) {
36                self.advance();
37                i += 1;
38            }
39        };
40
41        fn is_hexdigit(c: char) -> bool {
42            c.is_ascii_hexdigit()
43        }
44
45        fn is_octdigit(c: char) -> bool {
46            matches!(c, '0'..'8')
47        }
48
49        match c {
50            'a' => self.add_token('\x07'),
51            'b' => self.add_token('\x08'),
52            'e' => self.add_token('\x1b'),
53            'f' => self.add_token('\x0c'),
54            'n' => self.add_token('\x0a'),
55            'r' => self.add_token('\x0d'),
56            't' => self.add_token('\t'),
57            'v' => self.add_token('\x0b'),
58            '\\' => self.add_token('\\'),
59            '<' => self.add_token('<'),
60            '>' => self.add_token('>'),
61            '0' => {
62                scan_until(3, is_octdigit);
63                let num = u32::from_str_radix(&self.text()[2..], 8).unwrap();
64                self.add_token(char::from_u32(num).unwrap_or(char::REPLACEMENT_CHARACTER));
65            }
66            'x' => {
67                scan_until(2, is_hexdigit);
68                let num = u32::from_str_radix(&self.text()[2..], 16).unwrap();
69                self.add_token(char::from_u32(num).unwrap_or(char::REPLACEMENT_CHARACTER));
70            }
71            'u' => {
72                scan_until(4, is_hexdigit);
73                let num = u32::from_str_radix(&self.text()[2..], 16).unwrap();
74                self.add_token(char::from_u32(num).unwrap_or(char::REPLACEMENT_CHARACTER));
75            }
76            'U' => {
77                scan_until(8, is_hexdigit);
78                let num = u32::from_str_radix(&self.text()[2..], 16).unwrap();
79                self.add_token(char::from_u32(num).unwrap_or(char::REPLACEMENT_CHARACTER));
80            }
81            _ => {
82                self.add_token('\\');
83                self.add_token(c);
84            }
85        };
86    }
87
88    fn text(&self) -> String {
89        self.source[self.start..self.current].to_string()
90    }
91
92    fn add_token(&mut self, literal: char) {
93        self.tokens.push(Token::new(literal, self.span));
94        self.span.tie_end();
95    }
96}
97
98impl_generic_scanner!(|s: &mut Scanner| {
99    let c = s.advance();
100    match c {
101        '\\' => s.escape(),
102        '\x1b' => {
103            s.add_token('\x1b');
104        }
105        _ => s.add_token(c),
106    }
107});
108
109trait ToString {
110    fn to_string(&self) -> String;
111}
112
113impl ToString for [char] {
114    fn to_string(&self) -> String {
115        let mut text = String::with_capacity(self.len());
116
117        for ch in self {
118            text.push(*ch)
119        }
120
121        text
122    }
123}