basic/lang/
lex.rs

1use super::{token::*, LineNumber, MaxValue};
2use std::collections::VecDeque;
3
4pub fn lex(source_line: &str) -> (LineNumber, Vec<Token>) {
5    BasicLexer::lex(source_line)
6}
7
8fn is_basic_whitespace(c: char) -> bool {
9    c == ' ' || c == '\t'
10}
11
12fn is_basic_digit(c: char) -> bool {
13    c.is_ascii_digit()
14}
15
16fn is_basic_alphabetic(c: char) -> bool {
17    c.is_ascii_alphabetic()
18}
19
20struct BasicLexer {
21    chars: VecDeque<char>,
22    pending: VecDeque<Token>,
23    remark: bool,
24}
25
26impl<'a> Iterator for BasicLexer {
27    type Item = Token;
28
29    fn next(&mut self) -> Option<Self::Item> {
30        if let Some(t) = self.pending.pop_front() {
31            return Some(t);
32        }
33        let pk = self.chars.front()?;
34        if self.remark {
35            return Some(Token::Unknown(self.chars.drain(..).collect::<String>()));
36        }
37        if is_basic_whitespace(*pk) {
38            return self.whitespace();
39        }
40        if is_basic_digit(*pk) || *pk == '.' {
41            return self.number();
42        }
43        if is_basic_alphabetic(*pk) {
44            let token = self.alphabetic();
45            if matches!(token, Some(Token::Word(Word::Rem1))) {
46                self.remark = true;
47            }
48            return token;
49        }
50        if *pk == '"' {
51            return self.string();
52        }
53        if *pk == '&' {
54            return self.radix();
55        }
56        let minutia = self.minutia();
57        if matches!(minutia, Some(Token::Word(Word::Rem2))) {
58            self.remark = true;
59        }
60        minutia
61    }
62}
63
64impl BasicLexer {
65    fn lex(mut source_line: &str) -> (LineNumber, Vec<Token>) {
66        let mut line_number = None;
67        let mut line_str_pos: usize = 0;
68        let mut seen_digit = false;
69        while let Some(s) = source_line.get(line_str_pos..) {
70            if let Some(ch) = s.chars().next() {
71                if seen_digit && is_basic_whitespace(ch) {
72                    break;
73                }
74                if is_basic_digit(ch) {
75                    seen_digit = true;
76                } else if !is_basic_whitespace(ch) {
77                    break;
78                }
79                line_str_pos += 1;
80            } else {
81                break;
82            }
83        }
84        if let Ok(num) = source_line[0..line_str_pos].trim_start().parse::<u16>() {
85            if num <= LineNumber::max_value() {
86                line_number = Some(num);
87                if let Some(' ') = source_line[line_str_pos..].chars().next() {
88                    line_str_pos += 1;
89                }
90                source_line = &source_line[line_str_pos..];
91            }
92        }
93        let mut tokens = BasicLexer {
94            chars: source_line.chars().collect(),
95            pending: VecDeque::default(),
96            remark: false,
97        }
98        .collect();
99        BasicLexer::trim_end(&mut tokens);
100        BasicLexer::collapse_triples(&mut tokens);
101        BasicLexer::collapse_doubles(&mut tokens);
102        BasicLexer::separate_words(&mut tokens);
103        (line_number, tokens)
104    }
105
106    fn collapse_triples(tokens: &mut Vec<Token>) {
107        let mut locs: Vec<(usize, Token)> = vec![];
108        for (index, ttt) in tokens.windows(3).enumerate() {
109            if let Token::Operator(Operator::Less) = &ttt[0] {
110                if let Token::Whitespace(_) = &ttt[1] {
111                    if let Token::Operator(Operator::Greater) = &ttt[2] {
112                        locs.push((index, Token::Operator(Operator::NotEqual)));
113                    }
114                    if let Token::Operator(Operator::Equal) = &ttt[2] {
115                        locs.push((index, Token::Operator(Operator::LessEqual)));
116                    }
117                }
118            }
119            if let Token::Operator(Operator::Equal) = &ttt[0] {
120                if let Token::Whitespace(_) = &ttt[1] {
121                    if let Token::Operator(Operator::Greater) = &ttt[2] {
122                        locs.push((index, Token::Operator(Operator::GreaterEqual)));
123                    }
124                    if let Token::Operator(Operator::Less) = &ttt[2] {
125                        locs.push((index, Token::Operator(Operator::LessEqual)));
126                    }
127                }
128            }
129            if let Token::Operator(Operator::Greater) = &ttt[0] {
130                if let Token::Whitespace(_) = &ttt[1] {
131                    if let Token::Operator(Operator::Less) = &ttt[2] {
132                        locs.push((index, Token::Operator(Operator::NotEqual)));
133                    }
134                    if let Token::Operator(Operator::Equal) = &ttt[2] {
135                        locs.push((index, Token::Operator(Operator::GreaterEqual)));
136                    }
137                }
138            }
139            if let Token::Ident(Ident::Plain(go)) = &ttt[0] {
140                if go == "GO" {
141                    if let Token::Whitespace(_) = ttt[1] {
142                        if let Token::Word(Word::To) = ttt[2] {
143                            locs.push((index, Token::Word(Word::Goto)));
144                        }
145                        if let Token::Ident(Ident::Plain(sub)) = &ttt[2] {
146                            if sub == "SUB" {
147                                locs.push((index, Token::Word(Word::Gosub)));
148                            }
149                        }
150                    }
151                }
152            }
153        }
154        while let Some((index, token)) = locs.pop() {
155            tokens.splice(index..index + 3, Some(token));
156        }
157    }
158
159    fn collapse_doubles(tokens: &mut Vec<Token>) {
160        let mut locs: Vec<(usize, Token)> = vec![];
161        let mut tokens_iter = tokens.windows(2).enumerate();
162        while let Some((index, tt)) = tokens_iter.next() {
163            if let Token::Operator(Operator::Equal) = tt[0] {
164                if let Token::Operator(Operator::Greater) = tt[1] {
165                    locs.push((index, Token::Operator(Operator::GreaterEqual)));
166                    tokens_iter.next();
167                }
168                if let Token::Operator(Operator::Less) = tt[1] {
169                    locs.push((index, Token::Operator(Operator::LessEqual)));
170                    tokens_iter.next();
171                }
172            }
173            if let Token::Operator(Operator::Equal) = tt[1] {
174                if let Token::Operator(Operator::Greater) = tt[0] {
175                    locs.push((index, Token::Operator(Operator::GreaterEqual)));
176                    tokens_iter.next();
177                }
178                if let Token::Operator(Operator::Less) = tt[0] {
179                    locs.push((index, Token::Operator(Operator::LessEqual)));
180                    tokens_iter.next();
181                }
182            }
183            if let Token::Operator(Operator::Less) = tt[0] {
184                if let Token::Operator(Operator::Greater) = tt[1] {
185                    locs.push((index, Token::Operator(Operator::NotEqual)));
186                    tokens_iter.next();
187                }
188            }
189        }
190        while let Some((index, token)) = locs.pop() {
191            tokens.splice(index..index + 2, Some(token));
192        }
193    }
194
195    fn separate_words(tokens: &mut Vec<Token>) {
196        let mut locs: Vec<usize> = vec![];
197        for (index, tt) in tokens.windows(2).enumerate() {
198            if tt.iter().all(Token::is_word) {
199                locs.push(index);
200            }
201        }
202        while let Some(index) = locs.pop() {
203            tokens.insert(index + 1, Token::Whitespace(1));
204        }
205    }
206
207    fn trim_end(tokens: &mut Vec<Token>) {
208        if let Some(Token::Whitespace(_)) = tokens.last() {
209            tokens.pop();
210        }
211        if let Some(Token::Unknown(_)) = tokens.last() {
212            if let Some(Token::Unknown(s)) = tokens.pop() {
213                tokens.push(Token::Unknown(s.trim_end().into()));
214            }
215        }
216    }
217
218    fn whitespace(&mut self) -> Option<Token> {
219        let mut len = 0;
220        loop {
221            self.chars.pop_front();
222            len += 1;
223            if let Some(pk) = self.chars.front() {
224                if is_basic_whitespace(*pk) {
225                    continue;
226                }
227            }
228            return Some(Token::Whitespace(len));
229        }
230    }
231
232    fn number(&mut self) -> Option<Token> {
233        let mut s = String::new();
234        let mut digits = 0;
235        let mut decimal = false;
236        let mut exp = false;
237        while let Some(mut ch) = self.chars.pop_front() {
238            if ch == 'e' {
239                ch = 'E'
240            }
241            if ch == 'd' {
242                ch = 'D'
243            }
244            s.push(ch);
245            if !exp && is_basic_digit(ch) {
246                digits += 1;
247            }
248            if ch == '.' {
249                decimal = true
250            }
251            if ch == 'D' {
252                digits += 8;
253            }
254            if ch == '!' {
255                return Some(Token::Literal(Literal::Single(s)));
256            }
257            if ch == '#' {
258                return Some(Token::Literal(Literal::Double(s)));
259            }
260            if ch == '%' {
261                return Some(Token::Literal(Literal::Integer(s)));
262            }
263            if let Some(pk) = self.chars.front().cloned() {
264                if ch == 'E' || ch == 'D' {
265                    exp = true;
266                    if pk == '+' || pk == '-' {
267                        continue;
268                    }
269                    if !is_basic_digit(pk) {
270                        exp = false;
271                        s.pop();
272                        self.chars.push_front(ch);
273                    }
274                }
275                if is_basic_digit(pk) {
276                    continue;
277                }
278                if !exp && !decimal && pk == '.' {
279                    continue;
280                }
281                if !exp && pk == 'E' || pk == 'e' || pk == 'D' || pk == 'd' {
282                    continue;
283                }
284                if pk == '!' || pk == '#' || pk == '%' {
285                    continue;
286                }
287            }
288            break;
289        }
290        if digits > 7 {
291            return Some(Token::Literal(Literal::Double(s)));
292        }
293        if !exp && !decimal && s.parse::<i16>().is_ok() {
294            return Some(Token::Literal(Literal::Integer(s)));
295        }
296        Some(Token::Literal(Literal::Single(s)))
297    }
298
299    fn string(&mut self) -> Option<Token> {
300        let mut s = String::new();
301        self.chars.pop_front();
302        while let Some(ch) = self.chars.pop_front() {
303            if ch == '"' {
304                break;
305            }
306            s.push(ch);
307        }
308        Some(Token::Literal(Literal::String(s)))
309    }
310
311    fn alphabetic(&mut self) -> Option<Token> {
312        let mut s = String::new();
313        let mut digit = false;
314        while let Some(ch) = self.chars.pop_front() {
315            let ch = ch.to_ascii_uppercase();
316            s.push(ch);
317            if is_basic_digit(ch) {
318                digit = true;
319            }
320            if ch == '$' {
321                self.pending.push_back(Token::Ident(Ident::String(s)));
322                break;
323            } else if ch == '!' {
324                self.pending.push_back(Token::Ident(Ident::Single(s)));
325                break;
326            } else if ch == '#' {
327                self.pending.push_back(Token::Ident(Ident::Double(s)));
328                break;
329            } else if ch == '%' {
330                self.pending.push_back(Token::Ident(Ident::Integer(s)));
331                break;
332            }
333            if let Some(pk) = self.chars.front().cloned() {
334                if is_basic_alphabetic(pk) {
335                    if digit {
336                        self.pending.push_back(Token::Ident(Ident::Plain(s)));
337                        break;
338                    }
339                    continue;
340                }
341                if is_basic_digit(pk) || pk == '$' || pk == '!' || pk == '#' || pk == '%' {
342                    s = Token::scan_alphabetic(&mut self.pending, &s);
343                    if s.is_empty() {
344                        break;
345                    }
346                    continue;
347                }
348            }
349            s = Token::scan_alphabetic(&mut self.pending, &s);
350            if !s.is_empty() {
351                self.pending.push_back(Token::Ident(Ident::Plain(s)));
352            }
353            break;
354        }
355        self.pending.pop_front()
356    }
357
358    fn radix(&mut self) -> Option<Token> {
359        self.chars.pop_front();
360        let is_hex = if matches!(self.chars.front(), Some('H') | Some('h')) {
361            self.chars.pop_front();
362            true
363        } else {
364            false
365        };
366        let mut s = String::new();
367        while let Some(ch) = self.chars.pop_front() {
368            let ch = ch.to_ascii_uppercase();
369            if ('0'..='7').contains(&ch)
370                || (is_hex && (('8'..='9').contains(&ch) || ('A'..='F').contains(&ch)))
371            {
372                s.push(ch)
373            } else {
374                break;
375            }
376        }
377        if is_hex {
378            Some(Token::Literal(Literal::Hex(s)))
379        } else {
380            Some(Token::Literal(Literal::Octal(s)))
381        }
382    }
383
384    fn minutia(&mut self) -> Option<Token> {
385        let mut s = String::new();
386        while let Some(ch) = self.chars.pop_front() {
387            s.push(ch);
388            if let Some(token) = Token::match_minutia(&s) {
389                return Some(token);
390            }
391            if let Some(pk) = self.chars.front() {
392                if is_basic_alphabetic(*pk) {
393                    break;
394                }
395                if is_basic_digit(*pk) {
396                    break;
397                }
398                if is_basic_whitespace(*pk) {
399                    break;
400                }
401                continue;
402            }
403            break;
404        }
405        Some(Token::Unknown(s))
406    }
407}