ziyy_core/parser/word_parser/
scanner.rs1use super::token::Token;
2use crate::common::Span;
3use crate::scanner::{GenericScanner, Source};
4use crate::splitter::fragment::Fragment;
5
6pub struct Scanner {
7 source: Vec<char>,
8 tokens: Vec<Token>,
9 start: usize,
10 current: usize,
11 span: Span,
12}
13
14impl Scanner {
15 pub fn new(mut source: Fragment) -> Self {
16 source.span.tie_start();
17
18 Self {
19 source: source.lexeme.chars().collect(),
20 tokens: vec![],
21 start: 0,
22 current: 0,
23 span: source.span,
24 }
25 }
26
27 fn escape(&mut self) {
28 if self.is_at_end() {
29 return;
30 }
31 let c = self.advance();
32
33 let mut scan_until = |limit: u8, tester: fn(c: char) -> bool| {
34 let mut i = 0;
35 while i < limit && tester(self.peek()) {
36 self.advance();
37 i += 1;
38 }
39 };
40
41 fn is_hexdigit(c: char) -> bool {
42 c.is_ascii_hexdigit()
43 }
44
45 fn is_octdigit(c: char) -> bool {
46 matches!(c, '0'..'8')
47 }
48
49 match c {
50 'a' => self.add_token('\x07'),
51 'b' => self.add_token('\x08'),
52 'e' => self.add_token('\x1b'),
53 'f' => self.add_token('\x0c'),
54 'n' => self.add_token('\x0a'),
55 'r' => self.add_token('\x0d'),
56 't' => self.add_token('\t'),
57 'v' => self.add_token('\x0b'),
58 '\\' => self.add_token('\\'),
59 '<' => self.add_token('<'),
60 '>' => self.add_token('>'),
61 '0' => {
62 scan_until(3, is_octdigit);
63 let num = u32::from_str_radix(&self.text()[2..], 8).unwrap();
64 self.add_token(char::from_u32(num).unwrap_or(char::REPLACEMENT_CHARACTER));
65 }
66 'x' => {
67 scan_until(2, is_hexdigit);
68 let num = u32::from_str_radix(&self.text()[2..], 16).unwrap();
69 self.add_token(char::from_u32(num).unwrap_or(char::REPLACEMENT_CHARACTER));
70 }
71 'u' => {
72 scan_until(4, is_hexdigit);
73 let num = u32::from_str_radix(&self.text()[2..], 16).unwrap();
74 self.add_token(char::from_u32(num).unwrap_or(char::REPLACEMENT_CHARACTER));
75 }
76 'U' => {
77 scan_until(8, is_hexdigit);
78 let num = u32::from_str_radix(&self.text()[2..], 16).unwrap();
79 self.add_token(char::from_u32(num).unwrap_or(char::REPLACEMENT_CHARACTER));
80 }
81 _ => {
82 self.add_token('\\');
83 self.add_token(c);
84 }
85 };
86 }
87
88 fn text(&self) -> String {
89 self.source[self.start..self.current].to_string()
90 }
91
92 fn add_token(&mut self, literal: char) {
93 self.tokens.push(Token::new(literal, self.span));
94 self.span.tie_end();
95 }
96}
97
98impl_generic_scanner!(|s: &mut Scanner| {
99 let c = s.advance();
100 match c {
101 '\\' => s.escape(),
102 '\x1b' => {
103 s.add_token('\x1b');
104 }
105 _ => s.add_token(c),
106 }
107});
108
109trait ToString {
110 fn to_string(&self) -> String;
111}
112
113impl ToString for [char] {
114 fn to_string(&self) -> String {
115 let mut text = String::with_capacity(self.len());
116
117 for ch in self {
118 text.push(*ch)
119 }
120
121 text
122 }
123}