1use super::{token::*, LineNumber, MaxValue};
2use std::collections::VecDeque;
3
4pub fn lex(source_line: &str) -> (LineNumber, Vec<Token>) {
5 BasicLexer::lex(source_line)
6}
7
8fn is_basic_whitespace(c: char) -> bool {
9 c == ' ' || c == '\t'
10}
11
12fn is_basic_digit(c: char) -> bool {
13 c.is_ascii_digit()
14}
15
16fn is_basic_alphabetic(c: char) -> bool {
17 c.is_ascii_alphabetic()
18}
19
20struct BasicLexer {
21 chars: VecDeque<char>,
22 pending: VecDeque<Token>,
23 remark: bool,
24}
25
26impl<'a> Iterator for BasicLexer {
27 type Item = Token;
28
29 fn next(&mut self) -> Option<Self::Item> {
30 if let Some(t) = self.pending.pop_front() {
31 return Some(t);
32 }
33 let pk = self.chars.front()?;
34 if self.remark {
35 return Some(Token::Unknown(self.chars.drain(..).collect::<String>()));
36 }
37 if is_basic_whitespace(*pk) {
38 return self.whitespace();
39 }
40 if is_basic_digit(*pk) || *pk == '.' {
41 return self.number();
42 }
43 if is_basic_alphabetic(*pk) {
44 let token = self.alphabetic();
45 if matches!(token, Some(Token::Word(Word::Rem1))) {
46 self.remark = true;
47 }
48 return token;
49 }
50 if *pk == '"' {
51 return self.string();
52 }
53 if *pk == '&' {
54 return self.radix();
55 }
56 let minutia = self.minutia();
57 if matches!(minutia, Some(Token::Word(Word::Rem2))) {
58 self.remark = true;
59 }
60 minutia
61 }
62}
63
64impl BasicLexer {
65 fn lex(mut source_line: &str) -> (LineNumber, Vec<Token>) {
66 let mut line_number = None;
67 let mut line_str_pos: usize = 0;
68 let mut seen_digit = false;
69 while let Some(s) = source_line.get(line_str_pos..) {
70 if let Some(ch) = s.chars().next() {
71 if seen_digit && is_basic_whitespace(ch) {
72 break;
73 }
74 if is_basic_digit(ch) {
75 seen_digit = true;
76 } else if !is_basic_whitespace(ch) {
77 break;
78 }
79 line_str_pos += 1;
80 } else {
81 break;
82 }
83 }
84 if let Ok(num) = source_line[0..line_str_pos].trim_start().parse::<u16>() {
85 if num <= LineNumber::max_value() {
86 line_number = Some(num);
87 if let Some(' ') = source_line[line_str_pos..].chars().next() {
88 line_str_pos += 1;
89 }
90 source_line = &source_line[line_str_pos..];
91 }
92 }
93 let mut tokens = BasicLexer {
94 chars: source_line.chars().collect(),
95 pending: VecDeque::default(),
96 remark: false,
97 }
98 .collect();
99 BasicLexer::trim_end(&mut tokens);
100 BasicLexer::collapse_triples(&mut tokens);
101 BasicLexer::collapse_doubles(&mut tokens);
102 BasicLexer::separate_words(&mut tokens);
103 (line_number, tokens)
104 }
105
106 fn collapse_triples(tokens: &mut Vec<Token>) {
107 let mut locs: Vec<(usize, Token)> = vec![];
108 for (index, ttt) in tokens.windows(3).enumerate() {
109 if let Token::Operator(Operator::Less) = &ttt[0] {
110 if let Token::Whitespace(_) = &ttt[1] {
111 if let Token::Operator(Operator::Greater) = &ttt[2] {
112 locs.push((index, Token::Operator(Operator::NotEqual)));
113 }
114 if let Token::Operator(Operator::Equal) = &ttt[2] {
115 locs.push((index, Token::Operator(Operator::LessEqual)));
116 }
117 }
118 }
119 if let Token::Operator(Operator::Equal) = &ttt[0] {
120 if let Token::Whitespace(_) = &ttt[1] {
121 if let Token::Operator(Operator::Greater) = &ttt[2] {
122 locs.push((index, Token::Operator(Operator::GreaterEqual)));
123 }
124 if let Token::Operator(Operator::Less) = &ttt[2] {
125 locs.push((index, Token::Operator(Operator::LessEqual)));
126 }
127 }
128 }
129 if let Token::Operator(Operator::Greater) = &ttt[0] {
130 if let Token::Whitespace(_) = &ttt[1] {
131 if let Token::Operator(Operator::Less) = &ttt[2] {
132 locs.push((index, Token::Operator(Operator::NotEqual)));
133 }
134 if let Token::Operator(Operator::Equal) = &ttt[2] {
135 locs.push((index, Token::Operator(Operator::GreaterEqual)));
136 }
137 }
138 }
139 if let Token::Ident(Ident::Plain(go)) = &ttt[0] {
140 if go == "GO" {
141 if let Token::Whitespace(_) = ttt[1] {
142 if let Token::Word(Word::To) = ttt[2] {
143 locs.push((index, Token::Word(Word::Goto)));
144 }
145 if let Token::Ident(Ident::Plain(sub)) = &ttt[2] {
146 if sub == "SUB" {
147 locs.push((index, Token::Word(Word::Gosub)));
148 }
149 }
150 }
151 }
152 }
153 }
154 while let Some((index, token)) = locs.pop() {
155 tokens.splice(index..index + 3, Some(token));
156 }
157 }
158
159 fn collapse_doubles(tokens: &mut Vec<Token>) {
160 let mut locs: Vec<(usize, Token)> = vec![];
161 let mut tokens_iter = tokens.windows(2).enumerate();
162 while let Some((index, tt)) = tokens_iter.next() {
163 if let Token::Operator(Operator::Equal) = tt[0] {
164 if let Token::Operator(Operator::Greater) = tt[1] {
165 locs.push((index, Token::Operator(Operator::GreaterEqual)));
166 tokens_iter.next();
167 }
168 if let Token::Operator(Operator::Less) = tt[1] {
169 locs.push((index, Token::Operator(Operator::LessEqual)));
170 tokens_iter.next();
171 }
172 }
173 if let Token::Operator(Operator::Equal) = tt[1] {
174 if let Token::Operator(Operator::Greater) = tt[0] {
175 locs.push((index, Token::Operator(Operator::GreaterEqual)));
176 tokens_iter.next();
177 }
178 if let Token::Operator(Operator::Less) = tt[0] {
179 locs.push((index, Token::Operator(Operator::LessEqual)));
180 tokens_iter.next();
181 }
182 }
183 if let Token::Operator(Operator::Less) = tt[0] {
184 if let Token::Operator(Operator::Greater) = tt[1] {
185 locs.push((index, Token::Operator(Operator::NotEqual)));
186 tokens_iter.next();
187 }
188 }
189 }
190 while let Some((index, token)) = locs.pop() {
191 tokens.splice(index..index + 2, Some(token));
192 }
193 }
194
195 fn separate_words(tokens: &mut Vec<Token>) {
196 let mut locs: Vec<usize> = vec![];
197 for (index, tt) in tokens.windows(2).enumerate() {
198 if tt.iter().all(Token::is_word) {
199 locs.push(index);
200 }
201 }
202 while let Some(index) = locs.pop() {
203 tokens.insert(index + 1, Token::Whitespace(1));
204 }
205 }
206
207 fn trim_end(tokens: &mut Vec<Token>) {
208 if let Some(Token::Whitespace(_)) = tokens.last() {
209 tokens.pop();
210 }
211 if let Some(Token::Unknown(_)) = tokens.last() {
212 if let Some(Token::Unknown(s)) = tokens.pop() {
213 tokens.push(Token::Unknown(s.trim_end().into()));
214 }
215 }
216 }
217
218 fn whitespace(&mut self) -> Option<Token> {
219 let mut len = 0;
220 loop {
221 self.chars.pop_front();
222 len += 1;
223 if let Some(pk) = self.chars.front() {
224 if is_basic_whitespace(*pk) {
225 continue;
226 }
227 }
228 return Some(Token::Whitespace(len));
229 }
230 }
231
232 fn number(&mut self) -> Option<Token> {
233 let mut s = String::new();
234 let mut digits = 0;
235 let mut decimal = false;
236 let mut exp = false;
237 while let Some(mut ch) = self.chars.pop_front() {
238 if ch == 'e' {
239 ch = 'E'
240 }
241 if ch == 'd' {
242 ch = 'D'
243 }
244 s.push(ch);
245 if !exp && is_basic_digit(ch) {
246 digits += 1;
247 }
248 if ch == '.' {
249 decimal = true
250 }
251 if ch == 'D' {
252 digits += 8;
253 }
254 if ch == '!' {
255 return Some(Token::Literal(Literal::Single(s)));
256 }
257 if ch == '#' {
258 return Some(Token::Literal(Literal::Double(s)));
259 }
260 if ch == '%' {
261 return Some(Token::Literal(Literal::Integer(s)));
262 }
263 if let Some(pk) = self.chars.front().cloned() {
264 if ch == 'E' || ch == 'D' {
265 exp = true;
266 if pk == '+' || pk == '-' {
267 continue;
268 }
269 if !is_basic_digit(pk) {
270 exp = false;
271 s.pop();
272 self.chars.push_front(ch);
273 }
274 }
275 if is_basic_digit(pk) {
276 continue;
277 }
278 if !exp && !decimal && pk == '.' {
279 continue;
280 }
281 if !exp && pk == 'E' || pk == 'e' || pk == 'D' || pk == 'd' {
282 continue;
283 }
284 if pk == '!' || pk == '#' || pk == '%' {
285 continue;
286 }
287 }
288 break;
289 }
290 if digits > 7 {
291 return Some(Token::Literal(Literal::Double(s)));
292 }
293 if !exp && !decimal && s.parse::<i16>().is_ok() {
294 return Some(Token::Literal(Literal::Integer(s)));
295 }
296 Some(Token::Literal(Literal::Single(s)))
297 }
298
299 fn string(&mut self) -> Option<Token> {
300 let mut s = String::new();
301 self.chars.pop_front();
302 while let Some(ch) = self.chars.pop_front() {
303 if ch == '"' {
304 break;
305 }
306 s.push(ch);
307 }
308 Some(Token::Literal(Literal::String(s)))
309 }
310
311 fn alphabetic(&mut self) -> Option<Token> {
312 let mut s = String::new();
313 let mut digit = false;
314 while let Some(ch) = self.chars.pop_front() {
315 let ch = ch.to_ascii_uppercase();
316 s.push(ch);
317 if is_basic_digit(ch) {
318 digit = true;
319 }
320 if ch == '$' {
321 self.pending.push_back(Token::Ident(Ident::String(s)));
322 break;
323 } else if ch == '!' {
324 self.pending.push_back(Token::Ident(Ident::Single(s)));
325 break;
326 } else if ch == '#' {
327 self.pending.push_back(Token::Ident(Ident::Double(s)));
328 break;
329 } else if ch == '%' {
330 self.pending.push_back(Token::Ident(Ident::Integer(s)));
331 break;
332 }
333 if let Some(pk) = self.chars.front().cloned() {
334 if is_basic_alphabetic(pk) {
335 if digit {
336 self.pending.push_back(Token::Ident(Ident::Plain(s)));
337 break;
338 }
339 continue;
340 }
341 if is_basic_digit(pk) || pk == '$' || pk == '!' || pk == '#' || pk == '%' {
342 s = Token::scan_alphabetic(&mut self.pending, &s);
343 if s.is_empty() {
344 break;
345 }
346 continue;
347 }
348 }
349 s = Token::scan_alphabetic(&mut self.pending, &s);
350 if !s.is_empty() {
351 self.pending.push_back(Token::Ident(Ident::Plain(s)));
352 }
353 break;
354 }
355 self.pending.pop_front()
356 }
357
358 fn radix(&mut self) -> Option<Token> {
359 self.chars.pop_front();
360 let is_hex = if matches!(self.chars.front(), Some('H') | Some('h')) {
361 self.chars.pop_front();
362 true
363 } else {
364 false
365 };
366 let mut s = String::new();
367 while let Some(ch) = self.chars.pop_front() {
368 let ch = ch.to_ascii_uppercase();
369 if ('0'..='7').contains(&ch)
370 || (is_hex && (('8'..='9').contains(&ch) || ('A'..='F').contains(&ch)))
371 {
372 s.push(ch)
373 } else {
374 break;
375 }
376 }
377 if is_hex {
378 Some(Token::Literal(Literal::Hex(s)))
379 } else {
380 Some(Token::Literal(Literal::Octal(s)))
381 }
382 }
383
384 fn minutia(&mut self) -> Option<Token> {
385 let mut s = String::new();
386 while let Some(ch) = self.chars.pop_front() {
387 s.push(ch);
388 if let Some(token) = Token::match_minutia(&s) {
389 return Some(token);
390 }
391 if let Some(pk) = self.chars.front() {
392 if is_basic_alphabetic(*pk) {
393 break;
394 }
395 if is_basic_digit(*pk) {
396 break;
397 }
398 if is_basic_whitespace(*pk) {
399 break;
400 }
401 continue;
402 }
403 break;
404 }
405 Some(Token::Unknown(s))
406 }
407}