1use std::{
2 iter::Peekable,
3 mem,
4 ops::{Deref, DerefMut},
5 str::CharIndices,
6};
7
8pub use error_reporter::ErrorReporter;
9
10mod character_tests;
11mod error_reporter;
12mod fstring_tests;
13mod peakable;
14mod tests;
15
16#[derive(Debug, Hash, Clone, Copy, PartialEq, Eq)]
18pub enum TokenType {
19 OpenParen, CloseParen, OpenBrace, CloseBrace, OpenBracket, CloseBracket, Comma, Dot, Minus, Plus, Semicolon, Slash, Star, Colon, Percent, Pipe, Bang, Question, At, Dollar, Underscore, StarStar, ColonColon, Arrow, FatArrow, PipeArrow, DotDot, DotDotEq, NotEqual, Equal, EqualEqual, Greater, GreaterEqual, Less, LessEqual, PlusEqual, MinusEqual, StarEqual, SlashEqual, PercentEqual, Identifier, Error, String, FString, RawString, Number, Doc, And,
78 Break,
79 Class,
80 Const,
81 Continue,
82 Else,
83 Enum,
84 False,
85 For,
86 Fn,
87 If,
88 In,
89 Match,
90 Nil,
91 Not,
92 Or,
93 Pub,
94 Raise,
95 Return,
96 Super,
97 Self_,
98 True,
99 Let,
100 Use,
101 While,
102
103 AI,
105 Prompt,
106 Agent,
107
108 Invalid, Eof, }
112
113#[derive(Debug, Hash, Copy, Clone, Eq, PartialEq)]
115pub struct Token<'a> {
116 pub lexeme: &'a str,
118 pub line: u32,
120 pub kind: TokenType,
122}
123
124impl Default for Token<'_> {
125 fn default() -> Self {
126 Self {
127 kind: TokenType::Eof,
128 lexeme: "",
129 line: 1,
130 }
131 }
132}
133
134impl<'a> Token<'a> {
135 pub fn new(kind: TokenType, origin: &'a str, line: u32) -> Self {
137 Token {
138 kind,
139 lexeme: origin,
140 line,
141 }
142 }
143
144 pub fn is_function_def_keyword(&self) -> bool {
145 matches!(self.kind, TokenType::Fn | TokenType::AI | TokenType::Pub)
146 }
147
148 pub fn is_error_type(&self) -> bool {
149 self.kind == TokenType::Error
150 }
151
152 pub fn is_literal_token(&self) -> bool {
153 matches!(
154 self.kind,
155 TokenType::Number
156 | TokenType::String
157 | TokenType::True
158 | TokenType::False
159 | TokenType::Nil
160 )
161 }
162
163 pub fn is_synchronize_keyword(&self) -> bool {
168 matches!(
169 self.kind,
170 TokenType::Agent
171 | TokenType::AI
172 | TokenType::Class
173 | TokenType::Const
174 | TokenType::Enum
175 | TokenType::Fn
176 | TokenType::For
177 | TokenType::If
178 | TokenType::Let
179 | TokenType::Match
180 | TokenType::Pub
181 | TokenType::Raise
182 | TokenType::Return
183 | TokenType::Use
184 | TokenType::While
185 )
186 }
187
188 pub fn is_expr_start(&self) -> bool {
190 matches!(
192 self.kind,
193 TokenType::Number
194 | TokenType::String
195 | TokenType::True
196 | TokenType::False
197 | TokenType::Nil
198 | TokenType::Identifier
199 | TokenType::OpenParen
200 | TokenType::OpenBracket
201 | TokenType::Match
202 | TokenType::Minus
203 | TokenType::Not
204 | TokenType::Self_
205 | TokenType::Super
206 | TokenType::Pipe
207 )
208 }
209}
210
211struct Lexer<'a> {
213 source: &'a str,
215 iter: Peekable<CharIndices<'a>>,
217 start: usize,
219 current: usize,
221 line: u32,
223 is_eof: bool,
225}
226
227pub struct Scanner<'a> {
228 lexer: peakable::Peekable<Lexer<'a>>,
229 error_reporter: ErrorReporter,
230 pub current: Token<'a>,
231 pub previous: Token<'a>,
232}
233
234impl Deref for Scanner<'_> {
235 type Target = ErrorReporter;
236
237 fn deref(&self) -> &Self::Target {
238 &self.error_reporter
239 }
240}
241
242impl DerefMut for Scanner<'_> {
243 fn deref_mut(&mut self) -> &mut Self::Target {
244 &mut self.error_reporter
245 }
246}
247
248impl<'a> Lexer<'a> {
249 fn new(source: &'a str) -> Self {
251 Self {
252 source,
253 iter: source.char_indices().peekable(),
254 start: 0,
255 current: 0,
256 line: 1,
257 is_eof: false,
258 }
259 }
260
261 fn advance(&mut self) -> Option<char> {
263 match self.iter.next() {
264 Some((pos, ch)) => {
265 self.current = pos + ch.len_utf8();
266 Some(ch)
267 }
268 None => None,
269 }
270 }
271
272 fn peek(&mut self) -> Option<char> {
274 self.iter.peek().map(|&(_, c)| c)
275 }
276
277 fn check_next(&self, n: usize) -> Option<&str> {
279 let mut chars = self.source[self.current..].char_indices();
280 match chars.nth(n - 1) {
281 Some((end_offset, ch)) => {
282 let end = self.current + end_offset + ch.len_utf8();
283 if end <= self.source.len() {
284 Some(&self.source[self.current..end])
285 } else {
286 None
287 }
288 }
289 None => None,
290 }
291 }
292
293 fn next2(&mut self) -> &str {
295 let mut chars = self.source[self.current..].char_indices();
296 match chars.nth(1) {
297 Some((end_offset, ch)) => {
298 let end = self.current + end_offset + ch.len_utf8();
299 &self.source[self.current..end]
300 }
301 None => &self.source[self.current..],
302 }
303 }
304
305 fn skip_white_spaces(&mut self) {
307 while let Some(c) = self.peek() {
308 match c {
309 ' ' | '\r' | '\t' => {
310 self.advance();
311 }
312 '\n' => {
313 self.line += 1;
314 self.advance();
315 }
316 '/' => {
317 if self.next2() == "//" {
318 while matches!(self.peek(), Some(c) if c != '\n') {
319 self.advance();
320 }
321 } else {
322 return;
323 }
324 }
325 _ => return,
326 }
327 }
328 }
329
330 fn make_token(&self, kind: TokenType) -> Token<'a> {
332 Token {
333 kind,
334 lexeme: &self.source[self.start..self.current],
335 line: self.line,
336 }
337 }
338
339 fn scan_docstring(&mut self) -> Token<'a> {
340 for _ in 0..3 {
342 self.advance();
343 }
344 self.start += 3;
347
348 loop {
349 match self.peek() {
350 Some(c) => {
351 if c == '"' && self.next2() == "\"\"" {
352 break;
353 }
354 if c == '\n' {
356 self.line += 1;
357 }
358 self.advance();
359 }
360 None => {
362 return Token::new(TokenType::Invalid, "Unterminated docstring.", self.line);
363 }
364 }
365 }
366
367 let token = if self.start == self.current - 1 {
368 Token {
370 kind: TokenType::Doc,
371 lexeme: "",
372 line: self.line,
373 }
374 } else {
375 let mut token = self.make_token(TokenType::Doc);
376 token.lexeme = token.lexeme.trim();
377 token
378 };
379
380 while let Some(c) = self.peek() {
381 if c == '"' {
382 self.advance();
383 } else {
384 break;
385 }
386 }
387 token
388 }
389
390 fn scan_number(&mut self) -> Token<'a> {
392 self.consume_digits();
393 if self.source[self.current..].len() >= 2 {
395 let mut next_two_chars = self.source[self.current..self.current + 2].chars();
396 let (maybe_dot, maybe_digit) = (next_two_chars.next(), next_two_chars.next());
397 if maybe_dot == Some('.') && matches!(maybe_digit, Some(c) if c.is_ascii_digit()) {
398 self.advance();
400
401 self.consume_digits();
402 }
403 }
404
405 self.make_token(TokenType::Number)
406 }
407
408 fn consume_digits(&mut self) {
410 while matches!(self.peek(), Some(c) if c.is_ascii_digit()) {
411 self.advance();
412 }
413 }
414
415 fn scan_identifier(&mut self) -> Token<'a> {
417 while matches!(self.peek(), Some(c) if c.is_alphanumeric() || c == '_') {
418 self.advance();
419 }
420
421 if self.peek() == Some('!') {
423 self.advance();
424 return self.make_token(TokenType::Error);
426 }
427
428 let text = &self.source[self.start..self.current];
429 let kind = match text {
430 "ai" => TokenType::AI,
431 "agent" => TokenType::Agent,
432 "and" => TokenType::And,
433 "break" => TokenType::Break,
434 "class" => TokenType::Class,
435 "const" => TokenType::Const,
436 "continue" => TokenType::Continue,
437 "else" => TokenType::Else,
438 "enum" => TokenType::Enum,
439 "false" => TokenType::False,
440 "for" => TokenType::For,
441 "fn" => TokenType::Fn,
442 "if" => TokenType::If,
443 "in" => TokenType::In,
444 "match" => TokenType::Match,
445 "nil" => TokenType::Nil,
446 "not" => TokenType::Not,
447 "or" => TokenType::Or,
448 "prompt" => TokenType::Prompt,
449 "pub" => TokenType::Pub,
450 "return" => TokenType::Return,
451 "raise" => TokenType::Raise,
452 "super" => TokenType::Super,
453 "self" => TokenType::Self_,
454 "true" => TokenType::True,
455 "let" => TokenType::Let,
456 "use" => TokenType::Use,
457 "while" => TokenType::While,
458 _ => TokenType::Identifier,
459 };
460
461 self.make_token(kind)
462 }
463
464 fn scan_token(&mut self) -> Token<'a> {
466 self.skip_white_spaces();
467
468 self.start = self.current;
469
470 let c = match self.advance() {
471 Some(c) => c,
472 None => return Token::new(TokenType::Eof, "", self.line),
473 };
474
475 match c {
476 '(' => self.make_token(TokenType::OpenParen),
477 ')' => self.make_token(TokenType::CloseParen),
478 '[' => self.make_token(TokenType::OpenBracket),
479 ']' => self.make_token(TokenType::CloseBracket),
480 '{' => self.make_token(TokenType::OpenBrace),
481 '}' => self.make_token(TokenType::CloseBrace),
482 ';' => self.make_token(TokenType::Semicolon),
483 ',' => self.make_token(TokenType::Comma),
484 '@' => self.make_token(TokenType::At),
485 '$' => self.make_token(TokenType::Dollar),
486 '?' => self.make_token(TokenType::Question),
487 '_' => {
488 if !matches!(self.peek(), Some(c) if c.is_alphanumeric() || c == '_') {
490 self.make_token(TokenType::Underscore)
491 } else {
492 self.scan_identifier()
494 }
495 }
496 '.' => {
497 let kind = if self.peek() == Some('.') {
498 self.advance();
499 if self.peek() == Some('=') {
500 self.advance();
501 TokenType::DotDotEq
502 } else {
503 TokenType::DotDot
504 }
505 } else {
506 TokenType::Dot
507 };
508 self.make_token(kind)
509 }
510 '|' => {
511 let kind = if self.peek() == Some('>') {
512 self.advance();
513 TokenType::PipeArrow
514 } else {
515 TokenType::Pipe
516 };
517 self.make_token(kind)
518 }
519 '-' => {
520 let p = self.peek();
521 let kind = if p == Some('>') {
522 self.advance();
523 TokenType::Arrow
524 } else if p == Some('=') {
525 self.advance();
526 TokenType::MinusEqual
527 } else {
528 TokenType::Minus
529 };
530 self.make_token(kind)
531 }
532 '+' => {
533 let kind = if self.peek() == Some('=') {
534 self.advance();
535 TokenType::PlusEqual
536 } else {
537 TokenType::Plus
538 };
539 self.make_token(kind)
540 }
541 '/' => {
542 let kind = if self.peek() == Some('=') {
543 self.advance();
544 TokenType::SlashEqual
545 } else {
546 TokenType::Slash
547 };
548 self.make_token(kind)
549 }
550 '*' => {
551 let p = self.peek();
552 let kind = if p == Some('*') {
553 self.advance();
554 TokenType::StarStar
555 } else if p == Some('=') {
556 self.advance();
557 TokenType::StarEqual
558 } else {
559 TokenType::Star
560 };
561 self.make_token(kind)
562 }
563 ':' => {
564 let kind = if self.peek() == Some(':') {
565 self.advance();
566 TokenType::ColonColon
567 } else {
568 TokenType::Colon
569 };
570 self.make_token(kind)
571 }
572 '%' => {
573 let kind = if self.peek() == Some('=') {
574 self.advance();
575 TokenType::PercentEqual
576 } else {
577 TokenType::Percent
578 };
579 self.make_token(kind)
580 }
581 '!' => {
582 let kind = if self.peek() == Some('=') {
583 self.advance();
584 TokenType::NotEqual
585 } else {
586 TokenType::Bang
587 };
588 self.make_token(kind)
589 }
590 '=' => {
591 let p = self.peek();
592 let kind = if p == Some('=') {
593 self.advance();
594 TokenType::EqualEqual
595 } else if p == Some('>') {
596 self.advance();
597 TokenType::FatArrow
598 } else {
599 TokenType::Equal
600 };
601 self.make_token(kind)
602 }
603 '<' => {
604 let kind = if self.peek() == Some('=') {
605 self.advance();
606 TokenType::LessEqual
607 } else {
608 TokenType::Less
609 };
610 self.make_token(kind)
611 }
612 '>' => {
613 let kind = if self.peek() == Some('=') {
614 self.advance();
615 TokenType::GreaterEqual
616 } else {
617 TokenType::Greater
618 };
619 self.make_token(kind)
620 }
621 '"' => {
622 if let Some("\"\"") = self.check_next(2) {
624 self.scan_docstring()
625 } else {
626 self.scan_string()
628 }
629 }
630 'f' => {
631 if self.peek() == Some('"') {
632 self.scan_fstring()
633 } else {
634 self.scan_identifier()
636 }
637 }
638 'r' => {
639 if self.peek() == Some('"') {
641 self.advance(); let mut token = self.scan_string();
643 token.kind = TokenType::RawString;
645 token
646 } else {
647 self.scan_identifier()
648 }
649 }
650 c if c.is_ascii_digit() => self.scan_number(),
651 c if c.is_alphabetic() => self.scan_identifier(),
652 _ => Token::new(TokenType::Invalid, "Unexpected character.", self.line),
653 }
654 }
655
656 fn scan_fstring(&mut self) -> Token<'a> {
657 self.advance(); let start_content = self.current; let mut brace_depth = 0;
662
663 while let Some((end_pos, ch)) = self.iter.peek().copied() {
664 match ch {
665 '{' => {
666 brace_depth += 1;
667 self.advance();
668 }
669 '}' => {
670 if brace_depth > 0 {
671 brace_depth -= 1;
672 } else {
673 self.advance();
677 }
678 }
679 '\\' => {
680 self.advance(); self.advance(); }
683 '"' => {
684 let content = &self.source[start_content..end_pos];
685 self.advance(); return Token::new(TokenType::FString, content, self.line);
687 }
688 '\n' => {
689 self.line += 1;
690 self.advance();
691 }
692 _ => {
693 self.advance();
694 }
695 }
696 }
697
698 Token::new(TokenType::Invalid, "Unterminated f-string.", self.line)
699 }
700
701 fn scan_string(&mut self) -> Token<'a> {
703 let start_content = self.current; while let Some((end_pos, ch)) = self.iter.peek().copied() {
706 match ch {
707 '\\' => {
708 self.advance(); self.advance(); }
711 '"' => {
712 let content = &self.source[start_content..end_pos];
713 self.advance(); return Token::new(TokenType::String, content, self.line);
715 }
716 '\n' => {
717 self.line += 1;
718 self.advance();
719 }
720 _ => {
721 self.advance();
722 }
723 }
724 }
725
726 Token::new(TokenType::Invalid, "Unterminated string.", self.line)
727 }
728}
729
730impl Lexer<'_> {
731 fn read_raw_script(&mut self) -> Result<String, String> {
732 let mut script = String::new();
733 let mut brace_count = 1;
734
735 while let Some((_, ch)) = self.iter.peek() {
736 match ch {
737 '{' => {
738 brace_count += 1;
739 script.push('{');
740 }
741 '}' => {
742 brace_count -= 1;
743 if brace_count == 0 {
744 break;
745 } else {
746 script.push('}');
747 }
748 }
749 '\n' => {
750 script.push('\n');
751 self.line += 1;
752 }
753 ch => {
754 script.push(*ch);
755 }
756 }
757 self.advance();
758 }
759
760 if brace_count > 0 {
761 return Err("Unclosed script block".to_string());
762 }
763
764 Ok(script.trim().to_owned())
765 }
766}
767
768impl<'a> Iterator for Lexer<'a> {
769 type Item = Token<'a>;
770
771 fn next(&mut self) -> Option<Self::Item> {
772 if self.is_eof {
773 None
774 } else {
775 let token = self.scan_token();
776 self.is_eof = token.kind == TokenType::Eof;
777 Some(token)
778 }
779 }
780}
781
782impl<'a> From<peakable::Peekable<Lexer<'a>>> for Lexer<'a> {
783 fn from(p: peakable::Peekable<Lexer<'a>>) -> Self {
784 p.iter
785 }
786}
787
788impl<'a> Scanner<'a> {
789 pub fn new(source: &'a str) -> Self {
790 Scanner {
791 lexer: peakable::Peekable::new(Lexer::new(source)),
792 current: Token::default(),
793 previous: Token::default(),
794 error_reporter: ErrorReporter::new(),
795 }
796 }
797
798 pub fn read_raw_script(&mut self) -> Result<String, String> {
799 let mut lexer = Lexer::from(mem::replace(
800 &mut self.lexer,
801 peakable::Peekable::new(Lexer::new("")),
802 ));
803 let script = format!("{} {}", self.current.lexeme, lexer.read_raw_script()?);
804
805 self.lexer = peakable::Peekable::new(lexer);
806 self.advance();
808 Ok(script)
809 }
810
811 pub fn escape_string(&mut self, input: &str) -> Option<String> {
812 let mut escaped_string = String::new();
813 let mut chars = input.chars();
814 while let Some(ch) = chars.next() {
815 if ch == '\\' {
816 let c = match chars.next() {
817 Some('n') => '\n',
818 Some('r') => '\r',
819 Some('t') => '\t',
820 Some('\\') => '\\',
821 Some('\'') => '\'',
822 Some('\"') => '\"',
823 Some('0') => '\0',
824 Some(ch) => {
825 self.error(&format!("Invalid escape sequence: \\{}", ch));
826 return None;
827 }
828 None => {
829 self.error("Unterminated escape sequence");
830 return None;
831 }
832 };
833 escaped_string.push(c);
834 } else {
835 escaped_string.push(ch);
836 }
837 }
838 Some(escaped_string)
839 }
840
841 pub fn advance(&mut self) {
842 self.previous = mem::take(&mut self.current);
843
844 while let Some(token) = self.lexer.next() {
845 self.current = token;
846 if self.current.kind != TokenType::Invalid {
847 break;
848 }
849 self.error_at_current(self.current.lexeme);
850 }
851 }
852
853 pub fn consume(&mut self, kind: TokenType, message: &str) {
854 if self.check(kind) {
855 self.advance();
856 return;
857 }
858 self.error_at_current(message);
859 }
860
861 pub fn consume_either(&mut self, k1: TokenType, k2: TokenType, message: &str) {
862 if self.check_either(k1, k2) {
863 self.advance();
864 return;
865 }
866 self.error_at_current(message);
867 }
868
869 pub fn match_token(&mut self, kind: TokenType) -> bool {
870 if self.check(kind) {
871 self.advance();
872 true
873 } else {
874 false
875 }
876 }
877
878 pub fn peek_next(&mut self) -> Option<Token<'a>> {
879 self.lexer.peek().copied()
880 }
881
882 pub fn check(&self, kind: TokenType) -> bool {
883 self.current.kind == kind
884 }
885 pub fn check_either(&self, k1: TokenType, k2: TokenType) -> bool {
886 self.check(k1) || self.check(k2)
887 }
888
889 pub fn check_identifier(&self, lexme: &str) -> bool {
890 self.current.kind == TokenType::Identifier && self.current.lexeme == lexme
891 }
892
893 pub fn check_next(&mut self, kind: TokenType) -> bool {
894 self.peek_next().map(|t| t.kind == kind) == Some(true)
895 }
896
897 pub fn is_at_end(&self) -> bool {
898 self.current.kind == TokenType::Eof
899 }
900
901 pub fn error_at_current(&mut self, message: &str) {
902 let current = self.current;
903 self.error_at(current, message);
904 }
905
906 pub fn error(&mut self, message: &str) {
907 let previous = self.previous;
908 self.error_at(previous, message);
909 }
910
911 pub fn synchronize(&mut self) {
912 self.panic_mode = false;
913
914 while !self.is_at_end() {
915 if self.previous.kind == TokenType::Semicolon {
916 return;
917 }
918
919 if self.current.is_synchronize_keyword() {
920 return;
921 }
922 self.advance();
923 }
924 }
925}