layout/gv/parser/
lexer.rs1#[derive(Debug, Clone)]
4pub enum Token {
5 EOF,
6 Identifier(String),
7 GraphKW,
8 NodeKW,
9 EdgeKW,
10 DigraphKW,
11 StrictKW,
12 SubgraphKW,
13 Equal,
14 Colon,
15 Comma,
16 Semicolon,
17 ArrowRight,
18 ArrowLine,
19 OpenBracket,
20 CloseBracket,
21 OpenBrace,
22 CloseBrace,
23 Error(usize),
24}
25
26#[derive(Debug)]
27pub struct Lexer {
28 input: Vec<char>,
29 pub pos: usize,
30 pub ch: char,
31}
32
33impl Lexer {
34 pub fn from_string(input: &str) -> Self {
35 let chars = input.chars().collect();
36 Lexer::new(chars)
37 }
38
39 pub fn new(input: Vec<char>) -> Self {
40 let mut l = Self {
41 input,
42 pos: 0,
43 ch: '\0',
44 };
45 l.read_char();
46 l
47 }
48
49 pub fn print_error(&self) {
50 let mut found_loc = false;
51 let mut since_last_line = 0;
52 let mut idx = 0;
53 for ch in self.input.iter() {
55 print!("{}", ch);
56 idx += 1;
57 if idx == self.pos {
58 found_loc = true;
59 }
60 if *ch == '\n' {
63 if found_loc {
64 println!();
65 for _ in 2..since_last_line {
69 print!(" ");
70 }
71 println!("^");
72 return;
73 }
74 since_last_line = 0;
75 }
76 since_last_line += 1;
77 }
78 }
79
80 pub fn has_next(&self) -> bool {
81 self.pos < self.input.len()
82 }
83
84 pub fn read_char(&mut self) {
85 if !self.has_next() {
86 self.ch = '\0';
87 } else {
88 self.ch = self.input[self.pos];
89 self.pos += 1;
90 }
91 }
92
93 pub fn skip_whitespace(&mut self) -> bool {
94 let mut changed = false;
95 while self.ch.is_ascii_whitespace() {
96 self.read_char();
97 changed = true;
98 }
99 changed
100 }
101
102 pub fn skip_comment(&mut self) -> bool {
103 let mut changed = false;
104 if self.ch != '/' {
105 return changed;
106 }
107 self.read_char();
108 changed = true;
109
110 if self.ch == '*' {
111 let mut prev = '\0';
112 while self.has_next() {
113 changed = true;
114 self.read_char();
115 if prev == '*' && self.ch == '/' {
116 self.read_char();
117 return changed;
118 }
119 prev = self.ch;
120 }
121 return changed;
122 }
123
124 if self.ch == '/' {
125 while self.has_next() {
126 changed = true;
127 self.read_char();
128 if self.ch.is_ascii_control() {
129 self.read_char();
130 return changed;
131 }
132 }
133 }
134 changed
135 }
136
137 pub fn read_identifier(&mut self) -> String {
138 let mut result = String::new();
139 while self.ch.is_ascii_alphanumeric() || self.ch == '_' {
140 result.push(self.ch);
141 self.read_char();
142 }
143 result
144 }
145
146 pub fn read_number(&mut self) -> String {
147 let mut result = String::new();
148 let mut period = false;
149 while self.ch.is_numeric() || self.ch == '.' {
150 if self.ch == '.' {
152 if !period {
153 period = true;
154 } else {
155 break;
156 }
157 }
158 result.push(self.ch);
159 self.read_char();
160 }
161 result
162 }
163
164 pub fn read_string(&mut self) -> Token {
165 let mut result = String::new();
166 self.read_char();
167 while self.ch != '"' {
168 if self.ch == '\\' {
170 self.read_char();
172 self.ch = match self.ch {
173 'n' => '\n',
174 'l' => '\n',
175 _ => self.ch,
176 }
177 } else if self.ch == '\0' {
178 return Token::Error(self.pos);
180 }
181 result.push(self.ch);
182 self.read_char();
183 }
184 Token::Identifier(result)
185 }
186
187 pub fn next_token(&mut self) -> Token {
188 let tok: Token;
189 while self.skip_comment() || self.skip_whitespace() {}
190 match self.ch {
191 '=' => {
192 tok = Token::Equal;
193 }
194 ';' => {
195 tok = Token::Semicolon;
196 }
197 ':' => {
198 tok = Token::Colon;
199 }
200 '[' => {
201 tok = Token::OpenBracket;
202 }
203 ']' => {
204 tok = Token::CloseBracket;
205 }
206 '{' => {
207 tok = Token::OpenBrace;
208 }
209 '}' => {
210 tok = Token::CloseBrace;
211 }
212 ',' => {
213 tok = Token::Comma;
214 }
215 '"' => {
216 tok = self.read_string();
217 }
218 '-' => {
219 self.read_char();
220 match self.ch {
221 '>' => {
222 tok = Token::ArrowRight;
223 }
224 '-' => {
225 tok = Token::ArrowLine;
226 }
227 _ => {
228 if self.ch.is_ascii_digit() {
229 let mut num = String::new();
230 let res = self.read_number();
231 num.push('-');
232 num.push_str(&res[..]);
233 tok = Token::Identifier(num);
234 } else {
235 tok = Token::Error(self.pos);
236 }
237 }
238 }
239 }
240 '\0' => {
241 tok = Token::EOF;
242 }
243 _ => {
244 if self.ch.is_ascii_alphabetic() {
245 let name = self.read_identifier();
246 match name.as_str() {
247 "graph" => {
248 return Token::GraphKW;
249 }
250 "node" => {
251 return Token::NodeKW;
252 }
253 "edge" => {
254 return Token::EdgeKW;
255 }
256 "digraph" => {
257 return Token::DigraphKW;
258 }
259 "strict" => {
260 return Token::StrictKW;
261 }
262 "subgraph" => {
263 return Token::SubgraphKW;
264 }
265 _ => {
266 return Token::Identifier(name);
267 }
268 }
269 }
270 if self.ch.is_ascii_digit() {
271 let num = self.read_number();
272 return Token::Identifier(num);
273 }
274
275 return Token::Error(self.pos);
276 }
277 }
278 self.read_char();
279 tok
280 }
281}