layout/gv/parser/
lexer.rs

1//! The Lexer implementation for the GraphViz file format.
2
3#[derive(Debug, Clone)]
4pub enum Token {
5    EOF,
6    Identifier(String),
7    GraphKW,
8    NodeKW,
9    EdgeKW,
10    DigraphKW,
11    StrictKW,
12    SubgraphKW,
13    Equal,
14    Colon,
15    Comma,
16    Semicolon,
17    ArrowRight,
18    ArrowLine,
19    OpenBracket,
20    CloseBracket,
21    OpenBrace,
22    CloseBrace,
23    Error(usize),
24}
25
26#[derive(Debug)]
27pub struct Lexer {
28    input: Vec<char>,
29    pub pos: usize,
30    pub ch: char,
31}
32
33impl Lexer {
34    pub fn from_string(input: &str) -> Self {
35        let chars = input.chars().collect();
36        Lexer::new(chars)
37    }
38
39    pub fn new(input: Vec<char>) -> Self {
40        let mut l = Self {
41            input,
42            pos: 0,
43            ch: '\0',
44        };
45        l.read_char();
46        l
47    }
48
49    pub fn print_error(&self) {
50        let mut found_loc = false;
51        let mut since_last_line = 0;
52        let mut idx = 0;
53        // Print every char in the file.
54        for ch in self.input.iter() {
55            print!("{}", ch);
56            idx += 1;
57            if idx == self.pos {
58                found_loc = true;
59            }
60            // Go until the end of the line, but keep track how many spaces we
61            // need to print.
62            if *ch == '\n' {
63                if found_loc {
64                    println!();
65                    // Subtract 1, because 'pos' points one char after the error
66                    // and another one because we print a '^' marker instead of
67                    // the last space.
68                    for _ in 2..since_last_line {
69                        print!(" ");
70                    }
71                    println!("^");
72                    return;
73                }
74                since_last_line = 0;
75            }
76            since_last_line += 1;
77        }
78    }
79
80    pub fn has_next(&self) -> bool {
81        self.pos < self.input.len()
82    }
83
84    pub fn read_char(&mut self) {
85        if !self.has_next() {
86            self.ch = '\0';
87        } else {
88            self.ch = self.input[self.pos];
89            self.pos += 1;
90        }
91    }
92
93    pub fn skip_whitespace(&mut self) -> bool {
94        let mut changed = false;
95        while self.ch.is_ascii_whitespace() {
96            self.read_char();
97            changed = true;
98        }
99        changed
100    }
101
102    pub fn skip_comment(&mut self) -> bool {
103        let mut changed = false;
104        if self.ch != '/' {
105            return changed;
106        }
107        self.read_char();
108        changed = true;
109
110        if self.ch == '*' {
111            let mut prev = '\0';
112            while self.has_next() {
113                changed = true;
114                self.read_char();
115                if prev == '*' && self.ch == '/' {
116                    self.read_char();
117                    return changed;
118                }
119                prev = self.ch;
120            }
121            return changed;
122        }
123
124        if self.ch == '/' {
125            while self.has_next() {
126                changed = true;
127                self.read_char();
128                if self.ch.is_ascii_control() {
129                    self.read_char();
130                    return changed;
131                }
132            }
133        }
134        changed
135    }
136
137    pub fn read_identifier(&mut self) -> String {
138        let mut result = String::new();
139        while self.ch.is_ascii_alphanumeric() || self.ch == '_' {
140            result.push(self.ch);
141            self.read_char();
142        }
143        result
144    }
145
146    pub fn read_number(&mut self) -> String {
147        let mut result = String::new();
148        let mut period = false;
149        while self.ch.is_numeric() || self.ch == '.' {
150            // Only allow one period in each number.
151            if self.ch == '.' {
152                if !period {
153                    period = true;
154                } else {
155                    break;
156                }
157            }
158            result.push(self.ch);
159            self.read_char();
160        }
161        result
162    }
163
164    pub fn read_string(&mut self) -> Token {
165        let mut result = String::new();
166        self.read_char();
167        while self.ch != '"' {
168            // Handle escaping
169            if self.ch == '\\' {
170                // Consume the escape character.
171                self.read_char();
172                self.ch = match self.ch {
173                    'n' => '\n',
174                    'l' => '\n',
175                    _ => self.ch,
176                }
177            } else if self.ch == '\0' {
178                // Reached EOF without completing the string
179                return Token::Error(self.pos);
180            }
181            result.push(self.ch);
182            self.read_char();
183        }
184        Token::Identifier(result)
185    }
186
187    pub fn next_token(&mut self) -> Token {
188        let tok: Token;
189        while self.skip_comment() || self.skip_whitespace() {}
190        match self.ch {
191            '=' => {
192                tok = Token::Equal;
193            }
194            ';' => {
195                tok = Token::Semicolon;
196            }
197            ':' => {
198                tok = Token::Colon;
199            }
200            '[' => {
201                tok = Token::OpenBracket;
202            }
203            ']' => {
204                tok = Token::CloseBracket;
205            }
206            '{' => {
207                tok = Token::OpenBrace;
208            }
209            '}' => {
210                tok = Token::CloseBrace;
211            }
212            ',' => {
213                tok = Token::Comma;
214            }
215            '"' => {
216                tok = self.read_string();
217            }
218            '-' => {
219                self.read_char();
220                match self.ch {
221                    '>' => {
222                        tok = Token::ArrowRight;
223                    }
224                    '-' => {
225                        tok = Token::ArrowLine;
226                    }
227                    _ => {
228                        if self.ch.is_ascii_digit() {
229                            let mut num = String::new();
230                            let res = self.read_number();
231                            num.push('-');
232                            num.push_str(&res[..]);
233                            tok = Token::Identifier(num);
234                        } else {
235                            tok = Token::Error(self.pos);
236                        }
237                    }
238                }
239            }
240            '\0' => {
241                tok = Token::EOF;
242            }
243            _ => {
244                if self.ch.is_ascii_alphabetic() {
245                    let name = self.read_identifier();
246                    match name.as_str() {
247                        "graph" => {
248                            return Token::GraphKW;
249                        }
250                        "node" => {
251                            return Token::NodeKW;
252                        }
253                        "edge" => {
254                            return Token::EdgeKW;
255                        }
256                        "digraph" => {
257                            return Token::DigraphKW;
258                        }
259                        "strict" => {
260                            return Token::StrictKW;
261                        }
262                        "subgraph" => {
263                            return Token::SubgraphKW;
264                        }
265                        _ => {
266                            return Token::Identifier(name);
267                        }
268                    }
269                }
270                if self.ch.is_ascii_digit() {
271                    let num = self.read_number();
272                    return Token::Identifier(num);
273                }
274
275                return Token::Error(self.pos);
276            }
277        }
278        self.read_char();
279        tok
280    }
281}