1use std::collections::HashMap;
10use std::io::BufRead;
11use std::sync::Arc;
12use crate::doc::*;
13use crate::error::*;
14use crate::utils::*;
15
16#[derive(Clone, PartialEq, PartialOrd, Debug)]
18pub enum Token
19{
20 LParen,
22 RParen,
24 LBracket,
26 RBracket,
28 LBrace,
30 RBrace,
32 DotLBracket,
34 DotRBracket,
36 Ques,
38 Star,
40 Slash,
42 Plus,
44 Minus,
46 DotStar,
48 DotSlash,
50 DotPlus,
52 DotMinus,
54 Lt,
56 GtEq,
58 Gt,
60 LtEq,
62 Eq,
64 EqEq,
66 ExEq,
68 Apos,
70 Dot,
72 Colon,
74 ColonColon,
76 Comma,
78 Newline,
80 And,
82 Break,
84 By,
86 Continue,
88 Else,
90 End,
92 False,
94 Fill,
96 For,
98 Function,
100 If,
102 In,
104 Inf,
106 Module,
108 Nan,
110 None,
112 Not,
114 Or,
116 Quit,
118 Return,
120 Root,
122 To,
124 True,
126 While,
128 Int(i64),
130 Float(f32),
132 String(String),
134 Ident(String),
136}
137
138pub struct Lexer<'a>
144{
145 path: Arc<String>,
146 line: u64,
147 eol_column: usize,
148 reader: &'a mut dyn BufRead,
149 line_tokens: Vec<Result<(Token, Pos)>>,
150 is_stopped: bool,
151 keywords: HashMap<String, Token>,
152 doc: Option<Option<String>>,
153}
154
155impl<'a> Lexer<'a>
156{
157 pub fn new_with_line_and_doc_flag(path: Arc<String>, reader: &'a mut dyn BufRead, line: u64, is_doc: bool) -> Self
163 {
164 let mut keywords: HashMap<String, Token> = HashMap::new();
165 keywords.insert(String::from("and"), Token::And);
166 keywords.insert(String::from("break"), Token::Break);
167 keywords.insert(String::from("by"), Token::By);
168 keywords.insert(String::from("continue"), Token::Continue);
169 keywords.insert(String::from("else"), Token::Else);
170 keywords.insert(String::from("end"), Token::End);
171 keywords.insert(String::from("false"), Token::False);
172 keywords.insert(String::from("fill"), Token::Fill);
173 keywords.insert(String::from("for"), Token::For);
174 keywords.insert(String::from("function"), Token::Function);
175 keywords.insert(String::from("if"), Token::If);
176 keywords.insert(String::from("in"), Token::In);
177 keywords.insert(String::from("inf"), Token::Inf);
178 keywords.insert(String::from("module"), Token::Module);
179 keywords.insert(String::from("nan"), Token::Nan);
180 keywords.insert(String::from("none"), Token::None);
181 keywords.insert(String::from("not"), Token::Not);
182 keywords.insert(String::from("or"), Token::Or);
183 keywords.insert(String::from("quit"), Token::Quit);
184 keywords.insert(String::from("return"), Token::Return);
185 keywords.insert(String::from("root"), Token::Root);
186 keywords.insert(String::from("to"), Token::To);
187 keywords.insert(String::from("true"), Token::True);
188 keywords.insert(String::from("while"), Token::While);
189 let doc = if is_doc {
190 Some(None)
191 } else {
192 None
193 };
194 Lexer {
195 path,
196 line,
197 eol_column: 0,
198 reader,
199 line_tokens: Vec::new(),
200 is_stopped: false,
201 keywords,
202 doc,
203 }
204 }
205
206 pub fn new_with_line(path: Arc<String>, reader: &'a mut dyn BufRead, line: u64) -> Self
210 { Self::new_with_line_and_doc_flag(path, reader, line, false) }
211
212 pub fn new_with_doc_flag(path: Arc<String>, reader: &'a mut dyn BufRead, is_doc: bool) -> Self
216 { Self::new_with_line_and_doc_flag(path, reader, 1, is_doc) }
217
218 pub fn new(path: Arc<String>, reader: &'a mut dyn BufRead) -> Self
222 { Self::new_with_doc_flag(path, reader, false) }
223
224 pub fn path(&self) -> &Arc<String>
226 { &self.path }
227
228 fn read_line_tokens(&mut self)
229 {
230 let mut line = String::new();
231 match self.reader.read_line(&mut line) {
232 Ok(0) => self.is_stopped = true,
233 Ok(_) => {
234 let path = self.path.clone();
235 let line_count = self.line;
236 let line_without_crlf = str_without_crlf(line.as_str());
237 let mut cs = line_without_crlf.chars().enumerate().map(|p| (p.1, Pos::new(path.clone(), line_count, p.0 + 1)));
238 let cs2: &mut dyn Iterator<Item = (char, Pos)> = &mut cs;
239 let mut cs3 = PushbackIter::new(cs2);
240 self.line_tokens.clear();
241 self.eol_column = line_without_crlf.chars().count() + 1;
242 while self.read_token(&mut cs3) {}
243 self.line_tokens.push(Ok((Token::Newline, Pos::new(path, line_count, self.eol_column))));
244 self.line_tokens.reverse();
245 self.line += 1;
246 },
247 Err(err) => {
248 self.line_tokens = vec![Err(Error::ParserIo(self.path.clone(), err))];
249 self.is_stopped = true;
250 },
251 }
252 }
253
254 fn skip_spaces(&self, cs: &mut PushbackIter<&mut dyn Iterator<Item = (char, Pos)>>)
255 {
256 loop {
257 match cs.next() {
258 Some((c, _)) if c.is_whitespace() => (),
259 Some((c, pos)) => {
260 cs.undo((c, pos));
261 break;
262 },
263 None => break,
264 }
265 }
266 }
267
268 fn read_one_or_more_digits(&mut self, cs: &mut PushbackIter<&mut dyn Iterator<Item = (char, Pos)>>, s: &mut String, s_pos: Option<&mut Pos>) -> bool
269 {
270 match cs.next() {
271 Some((c, pos)) if c.is_ascii_digit() => {
272 match s_pos {
273 Some(s_pos) => *s_pos = pos.clone(),
274 None => (),
275 }
276 s.push(c);
277 loop {
278 match cs.next() {
279 Some((c2, _)) if c2.is_ascii_digit() => s.push(c2),
280 Some((c2, pos2)) => {
281 cs.undo((c2, pos2));
282 break;
283 },
284 None => break,
285 }
286 }
287 true
288 },
289 Some((_, pos)) => {
290 self.line_tokens.push(Err(Error::Parser(pos, String::from("no decimal digits"))));
291 self.is_stopped = true;
292 false
293 },
294 None => {
295 self.line_tokens.push(Err(Error::Parser(Pos::new(self.path.clone(), self.line, self.eol_column), String::from("no decimal digits"))));
296 self.is_stopped = true;
297 false
298 },
299 }
300 }
301
302 fn read_one_or_more_hexdigits(&mut self, cs: &mut PushbackIter<&mut dyn Iterator<Item = (char, Pos)>>, s: &mut String, s_pos: Option<&mut Pos>) -> bool
303 {
304 match cs.next() {
305 Some((c, pos)) if c.is_ascii_hexdigit() => {
306 match s_pos {
307 Some(s_pos) => *s_pos = pos.clone(),
308 None => (),
309 }
310 s.push(c);
311 loop {
312 match cs.next() {
313 Some((c2, _)) if c2.is_ascii_hexdigit() => s.push(c2),
314 Some((c2, pos2)) => {
315 cs.undo((c2, pos2));
316 break;
317 },
318 None => break,
319 }
320 }
321 true
322 },
323 Some((_, pos)) => {
324 self.line_tokens.push(Err(Error::Parser(pos, String::from("no hexadecimal digits"))));
325 self.is_stopped = true;
326 false
327 },
328 None => {
329 self.line_tokens.push(Err(Error::Parser(Pos::new(self.path.clone(), self.line, self.eol_column), String::from("no hexadecimal digits"))));
330 self.is_stopped = true;
331 false
332 },
333 }
334 }
335
336 fn read_number_token(&mut self, cs: &mut PushbackIter<&mut dyn Iterator<Item = (char, Pos)>>) -> bool
337 {
338 let mut s = String::new();
339 let mut s_pos = Pos::new(self.path.clone(), self.line, 1);
340 let mut is_dot_or_exp = false;
341 match cs.next() {
342 Some((c @ '0', pos)) => {
343 match cs.next() {
344 Some(('X' | 'x', _)) => {
345 s_pos = pos;
346 if !self.read_one_or_more_hexdigits(cs, &mut s, None) {
347 return false;
348 }
349 match i64::from_str_radix(s.as_str(), 16) {
350 Ok(n) => self.line_tokens.push(Ok((Token::Int(n), s_pos))),
351 Err(_) => {
352 self.line_tokens.push(Err(Error::Parser(s_pos, String::from("invalid number"))));
353 self.is_stopped = true;
354 return false;
355 },
356 }
357 return true;
358 },
359 Some((c2, pos2)) => {
360 cs.undo((c2, pos2));
361 cs.undo((c, pos));
362 },
363 None => cs.undo((c, pos)),
364 }
365 },
366 Some((c, pos)) => cs.undo((c, pos)),
367 None => (),
368 }
369 if !self.read_one_or_more_digits(cs, &mut s, Some(&mut s_pos)) {
370 return false;
371 }
372 match cs.next() {
373 Some((c @ '.', _)) => {
374 s.push(c);
375 is_dot_or_exp = true;
376 if !self.read_one_or_more_digits(cs, &mut s, None) {
377 return false;
378 }
379 }
380 Some((c, pos)) => cs.undo((c, pos)),
381 None => (),
382 }
383 match cs.next() {
384 Some((c @ ('E' | 'e'), _)) => {
385 s.push(c);
386 is_dot_or_exp = true;
387 match cs.next() {
388 Some((c2 @ ('+' | '-'), _)) => s.push(c2),
389 Some((c2, pos2)) => cs.undo((c2, pos2)),
390 None => (),
391 }
392 if !self.read_one_or_more_digits(cs, &mut s, None) {
393 return false;
394 }
395 }
396 Some((c, pos)) => cs.undo((c, pos)),
397 None => (),
398 }
399 if is_dot_or_exp {
400 match s.parse::<f32>() {
401 Ok(n) => self.line_tokens.push(Ok((Token::Float(n), s_pos))),
402 Err(_) => {
403 self.line_tokens.push(Err(Error::Parser(s_pos, String::from("invalid number"))));
404 self.is_stopped = true;
405 return false;
406 },
407 }
408 } else {
409 match s.parse::<i64>() {
410 Ok(n) => self.line_tokens.push(Ok((Token::Int(n), s_pos))),
411 Err(_) => {
412 self.line_tokens.push(Err(Error::Parser(s_pos, String::from("invalid number"))));
413 self.is_stopped = true;
414 return false;
415 },
416 }
417 }
418 true
419 }
420
421 fn read_string_token(&mut self, cs: &mut PushbackIter<&mut dyn Iterator<Item = (char, Pos)>>) -> bool
422 {
423 match cs.next() {
424 Some(('"', pos)) => {
425 let mut s = String::new();
426 loop {
427 match cs.next() {
428 Some(('"', _)) => break,
429 Some(('\\', pos2)) => {
430 match cs.next() {
431 Some(('a', _)) => s.push('\x07'),
432 Some(('b', _)) => s.push('\x08'),
433 Some(('t', _)) => s.push('\t'),
434 Some(('n', _)) => s.push('\n'),
435 Some(('v', _)) => s.push('\x0b'),
436 Some(('f', _)) => s.push('\x0c'),
437 Some(('r', _)) => s.push('\r'),
438 Some((c3 @ ('U'| 'u'), _)) => {
439 let mut t = String::new();
440 let n = if c3 == 'U' { 6 } else { 4 };
441 for _ in 0..n {
442 match cs.next() {
443 Some((c4, _)) if c4.is_ascii_hexdigit() => t.push(c4),
444 _ => {
445 self.line_tokens.push(Err(Error::Parser(pos2, String::from("invalid unicode escape"))));
446 self.is_stopped = true;
447 return false;
448 }
449 }
450 }
451 match u32::from_str_radix(t.as_str(), 16) {
452 Ok(code) => {
453 match char::from_u32(code) {
454 Some(esc_c) => s.push(esc_c),
455 None => {
456 self.line_tokens.push(Err(Error::Parser(pos2, String::from("invalid unicode escape"))));
457 self.is_stopped = true;
458 return false;
459 },
460 }
461 },
462 Err(_) => {
463 self.line_tokens.push(Err(Error::Parser(pos2, String::from("invalid unicode escape"))));
464 self.is_stopped = true;
465 return false;
466 },
467 }
468 },
469 Some((c3 @ ('0'..='7'), _)) => {
470 let mut t = String::new();
471 t.push(c3);
472 for _ in 0..2 {
473 match cs.next() {
474 Some((c4 @ ('0'..='7'), _)) => t.push(c4),
475 Some((c4, pos4)) => {
476 cs.undo((c4, pos4));
477 break;
478 },
479 None => {
480 self.line_tokens.push(Err(Error::Parser(pos2, String::from("unclosed string"))));
481 self.is_stopped = true;
482 return false;
483 }
484 }
485 }
486 match u32::from_str_radix(t.as_str(), 8) {
487 Ok(code) => {
488 match char::from_u32(code) {
489 Some(esc_c) => s.push(esc_c),
490 None => {
491 self.line_tokens.push(Err(Error::Parser(pos2, String::from("invalid octal escape"))));
492 self.is_stopped = true;
493 return false;
494 },
495 }
496 },
497 Err(_) => {
498 self.line_tokens.push(Err(Error::Parser(pos2, String::from("invalid octal escape"))));
499 self.is_stopped = true;
500 return false;
501 },
502 }
503 },
504 Some((c3, _)) => s.push(c3),
505 None => {
506 self.line_tokens.push(Err(Error::Parser(pos, String::from("unclosed string"))));
507 self.is_stopped = true;
508 return false;
509 },
510 }
511 },
512 Some((c2, _)) => s.push(c2),
513 None => {
514 self.line_tokens.push(Err(Error::Parser(pos, String::from("unclosed string"))));
515 self.is_stopped = true;
516 return false;
517 },
518 }
519 }
520 self.line_tokens.push(Ok((Token::String(s), pos)));
521 true
522 },
523 Some((_, pos)) => {
524 self.line_tokens.push(Err(Error::Parser(pos, String::from("invalid string"))));
525 self.is_stopped = true;
526 false
527 },
528 None => {
529 self.line_tokens.push(Err(Error::Parser(Pos::new(self.path.clone(), self.line, self.eol_column), String::from("invalid string"))));
530 self.is_stopped = true;
531 false
532 },
533 }
534 }
535
536 fn read_ident_chars(&mut self, cs: &mut PushbackIter<&mut dyn Iterator<Item = (char, Pos)>>, s: &mut String)
537 {
538 loop {
539 match cs.next() {
540 Some((c, _)) if c.is_alphanumeric() || c == '_' => s.push(c),
541 Some((c, pos)) => {
542 cs.undo((c, pos));
543 break;
544 },
545 None => break,
546 }
547 }
548 }
549
550 fn read_keyword_or_ident_token(&mut self, cs: &mut PushbackIter<&mut dyn Iterator<Item = (char, Pos)>>) -> bool
551 {
552 match cs.next() {
553 Some((c, pos)) if c.is_alphabetic() || c == '_' => {
554 let mut s = String::new();
555 s.push(c);
556 self.read_ident_chars(cs, &mut s);
557 match self.keywords.get(&s) {
558 Some(keyword) => self.line_tokens.push(Ok((keyword.clone(), pos))),
559 None => self.line_tokens.push(Ok((Token::Ident(s), pos))),
560 }
561 true
562 },
563 Some((_, pos)) => {
564 self.line_tokens.push(Err(Error::Parser(pos, String::from("invalid keyword or identifier"))));
565 self.is_stopped = true;
566 false
567 },
568 None => {
569 self.line_tokens.push(Err(Error::Parser(Pos::new(self.path.clone(), self.line, self.eol_column), String::from("invalid keyword or identifier"))));
570 self.is_stopped = true;
571 false
572 },
573 }
574 }
575
576 fn read_token(&mut self, cs: &mut PushbackIter<&mut dyn Iterator<Item = (char, Pos)>>) -> bool
577 {
578 self.skip_spaces(cs);
579 match cs.next() {
580 Some(('#' | '%', _)) => {
581 match cs.next() {
582 Some(('#' | '%', _)) => {
583 match &mut self.doc {
584 Some(doc) => {
585 match cs.next() {
586 Some((c3, _)) if c3.is_whitespace() => (),
587 Some((c3, pos3)) => cs.undo((c3, pos3)),
588 None => (),
589 }
590 let mut doc_line: String = cs.map(|p| p.0).collect();
591 doc_line.push('\n');
592 match doc {
593 Some(doc) => doc.push_str(doc_line.as_str()),
594 None => *doc = Some(doc_line),
595 }
596 },
597 None => (),
598 }
599 },
600 _ => (),
601 }
602 return false
603 },
604 Some(('(', pos)) => self.line_tokens.push(Ok((Token::LParen, pos))),
605 Some((')', pos)) => self.line_tokens.push(Ok((Token::RParen, pos))),
606 Some(('[', pos)) => self.line_tokens.push(Ok((Token::LBracket, pos))),
607 Some((']', pos)) => self.line_tokens.push(Ok((Token::RBracket, pos))),
608 Some(('{', pos)) => self.line_tokens.push(Ok((Token::LBrace, pos))),
609 Some(('}', pos)) => self.line_tokens.push(Ok((Token::RBrace, pos))),
610 Some(('?', pos)) => self.line_tokens.push(Ok((Token::Ques, pos))),
611 Some(('*', pos)) => self.line_tokens.push(Ok((Token::Star, pos))),
612 Some(('/', pos)) => self.line_tokens.push(Ok((Token::Slash, pos))),
613 Some(('+', pos)) => self.line_tokens.push(Ok((Token::Plus, pos))),
614 Some(('-', pos)) => self.line_tokens.push(Ok((Token::Minus, pos))),
615 Some(('<', pos)) => {
616 match cs.next() {
617 Some(('=', _)) => self.line_tokens.push(Ok((Token::LtEq, pos))),
618 Some((c2, pos2)) => {
619 self.line_tokens.push(Ok((Token::Lt, pos)));
620 cs.undo((c2, pos2));
621 },
622 None => self.line_tokens.push(Ok((Token::Lt, pos))),
623 }
624 },
625 Some(('>', pos)) => {
626 match cs.next() {
627 Some(('=', _)) => self.line_tokens.push(Ok((Token::GtEq, pos))),
628 Some((c2, pos2)) => {
629 self.line_tokens.push(Ok((Token::Gt, pos)));
630 cs.undo((c2, pos2));
631 },
632 None => self.line_tokens.push(Ok((Token::Gt, pos))),
633 }
634 },
635 Some(('=', pos)) => {
636 match cs.next() {
637 Some(('=', _)) => self.line_tokens.push(Ok((Token::EqEq, pos))),
638 Some((c2, pos2)) => {
639 self.line_tokens.push(Ok((Token::Eq, pos)));
640 cs.undo((c2, pos2));
641 },
642 None => self.line_tokens.push(Ok((Token::Eq, pos))),
643 }
644 },
645 Some(('!', pos)) => {
646 match cs.next() {
647 Some(('=', _)) => self.line_tokens.push(Ok((Token::ExEq, pos))),
648 _ => {
649 self.line_tokens.push(Err(Error::Parser(pos, String::from("unexpected character"))));
650 self.is_stopped = true;
651 return false;
652 },
653 }
654 },
655 Some(('\'', pos)) => self.line_tokens.push(Ok((Token::Apos, pos))),
656 Some(('.', pos)) => {
657 match cs.next() {
658 Some(('[', _)) => self.line_tokens.push(Ok((Token::DotLBracket, pos))),
659 Some((']', _)) => self.line_tokens.push(Ok((Token::DotRBracket, pos))),
660 Some(('*', _)) => self.line_tokens.push(Ok((Token::DotStar, pos))),
661 Some(('/', _)) => self.line_tokens.push(Ok((Token::DotSlash, pos))),
662 Some(('+', _)) => self.line_tokens.push(Ok((Token::DotPlus, pos))),
663 Some(('-', _)) => self.line_tokens.push(Ok((Token::DotMinus, pos))),
664 Some((c2, pos2)) => {
665 self.line_tokens.push(Ok((Token::Dot, pos)));
666 cs.undo((c2, pos2));
667 },
668 None => self.line_tokens.push(Ok((Token::Dot, pos))),
669 }
670 },
671 Some((':', pos)) => {
672 match cs.next() {
673 Some((':', _)) => self.line_tokens.push(Ok((Token::ColonColon, pos))),
674 Some((c2, pos2)) => {
675 self.line_tokens.push(Ok((Token::Colon, pos)));
676 cs.undo((c2, pos2));
677 },
678 None => self.line_tokens.push(Ok((Token::Colon, pos))),
679 }
680 },
681 Some((',', pos)) => self.line_tokens.push(Ok((Token::Comma, pos))),
682 Some((';', pos)) => self.line_tokens.push(Ok((Token::Newline, pos))),
683 Some((c @ '"', pos)) => {
684 cs.undo((c, pos));
685 return self.read_string_token(cs);
686 },
687 Some((c, pos)) if c.is_ascii_digit() => {
688 cs.undo((c, pos));
689 return self.read_number_token(cs);
690 },
691 Some((c, pos)) if c.is_alphabetic() || c == '_' => {
692 cs.undo((c, pos));
693 return self.read_keyword_or_ident_token(cs);
694 },
695 Some((_, pos)) => {
696 self.line_tokens.push(Err(Error::Parser(pos, String::from("unexpected character"))));
697 self.is_stopped = true;
698 return false;
699 },
700 None => return false,
701 }
702 true
703 }
704}
705
706impl<'a> Iterator for Lexer<'a>
707{
708 type Item = Result<(Token, Pos)>;
709
710 fn next(&mut self) -> Option<Self::Item>
711 {
712 if self.line_tokens.is_empty() {
713 if !self.is_stopped {
714 self.read_line_tokens();
715 } else {
716 return None;
717 }
718 }
719 match self.line_tokens.pop() {
720 Some(Ok(token)) => Some(Ok(token)),
721 Some(Err(err)) => {
722 self.line_tokens.clear();
723 Some(Err(err))
724 },
725 None => None,
726 }
727 }
728}
729
730impl<'a> DocIterator for Lexer<'a>
731{
732 fn take_doc(&mut self) -> Option<String>
733 {
734 match &mut self.doc {
735 Some(doc) => doc.take(),
736 None => None,
737 }
738 }
739}
740
741#[cfg(test)]
742mod tests;