1use crate::{SyntaxKind, SyntaxKind::*};
24use logos::Logos;
25use rowan::{TextRange, TextSize};
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub struct Token {
30 pub kind: SyntaxKind,
32 pub len: u32,
34}
35
36#[derive(Debug, Clone, PartialEq, Eq)]
38pub enum LexErrorKind {
39 InvalidDigit { digit: char, radix: u32, token: String },
41 CouldNotLex { content: String },
43 BidiOverride,
45}
46
47#[derive(Debug, Clone, PartialEq, Eq)]
49pub struct LexError {
50 pub range: TextRange,
52 pub kind: LexErrorKind,
54}
55
56fn comment_block(lex: &mut logos::Lexer<LogosToken>) -> bool {
63 let mut last_asterisk = false;
64 for (index, c) in lex.remainder().char_indices() {
65 if c == '*' {
66 last_asterisk = true;
67 } else if c == '/' && last_asterisk {
68 lex.bump(index + 1);
69 return true;
70 } else if matches!(c, '\u{202A}'..='\u{202E}' | '\u{2066}'..='\u{2069}') {
71 lex.bump(index);
74 return true;
75 } else {
76 last_asterisk = false;
77 }
78 }
79 let remaining = lex.remainder().len();
81 lex.bump(remaining);
82 true
83}
84
85#[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)]
90enum LogosToken {
91 #[regex(r"[ \t\f]+")]
95 Whitespace,
96
97 #[regex(r"\r?\n")]
98 Linebreak,
99
100 #[regex(r"//[^\r\n\u{202A}-\u{202E}\u{2066}-\u{2069}]*")]
102 CommentLine,
103
104 #[token(r"/*", comment_block)]
105 CommentBlock,
106
107 #[regex(r"aleo1[a-z0-9]*")]
113 AddressLiteral,
114
115 #[regex(r"0x[0-9A-Za-z_]+([ui](8|16|32|64|128)|field|group|scalar)?", priority = 3)]
120 #[regex(r"0o[0-9A-Za-z_]+([ui](8|16|32|64|128)|field|group|scalar)?", priority = 3)]
121 #[regex(r"0b[0-9A-Za-z_]+([ui](8|16|32|64|128)|field|group|scalar)?", priority = 3)]
122 #[regex(r"[0-9][0-9A-Za-z_]*([ui](8|16|32|64|128)|field|group|scalar)?")]
123 Integer,
124
125 #[regex(r#""[^"]*""#)]
126 StaticString,
127
128 #[regex(r"'[a-zA-Z][a-zA-Z0-9_]*'")]
130 IdentifierLiteral,
131
132 #[regex(r"group::[a-zA-Z][a-zA-Z0-9_]*")]
142 #[regex(r"signature::[a-zA-Z][a-zA-Z0-9_]*")]
143 #[regex(r"Future::[a-zA-Z][a-zA-Z0-9_]*")]
144 PathSpecial,
145
146 #[regex(r"_[a-zA-Z][a-zA-Z0-9_]*")]
148 IdentIntrinsic,
149
150 #[regex(r"[a-zA-Z][a-zA-Z0-9_]*")]
152 Ident,
153
154 #[token("**=")]
158 PowAssign,
159 #[token("&&=")]
160 AndAssign,
161 #[token("||=")]
162 OrAssign,
163 #[token("<<=")]
164 ShlAssign,
165 #[token(">>=")]
166 ShrAssign,
167
168 #[token("**")]
169 Pow,
170 #[token("&&")]
171 And,
172 #[token("||")]
173 Or,
174 #[token("<<")]
175 Shl,
176 #[token(">>")]
177 Shr,
178 #[token("==")]
179 EqEq,
180 #[token("!=")]
181 NotEq,
182 #[token("<=")]
183 LtEq,
184 #[token(">=")]
185 GtEq,
186 #[token("+=")]
187 AddAssign,
188 #[token("-=")]
189 SubAssign,
190 #[token("*=")]
191 MulAssign,
192 #[token("/=")]
193 DivAssign,
194 #[token("%=")]
195 RemAssign,
196 #[token("&=")]
197 BitAndAssign,
198 #[token("|=")]
199 BitOrAssign,
200 #[token("^=")]
201 BitXorAssign,
202
203 #[token("->")]
204 Arrow,
205 #[token("=>")]
206 FatArrow,
207 #[token("..=")]
208 DotDotEq,
209 #[token("..")]
210 DotDot,
211 #[token("::")]
212 ColonColon,
213
214 #[token("=")]
216 Eq,
217 #[token("!")]
218 Bang,
219 #[token("<")]
220 Lt,
221 #[token(">")]
222 Gt,
223 #[token("+")]
224 Plus,
225 #[token("-")]
226 Minus,
227 #[token("*")]
228 Star,
229 #[token("/")]
230 Slash,
231 #[token("%")]
232 Percent,
233 #[token("&")]
234 Amp,
235 #[token("|")]
236 Pipe,
237 #[token("^")]
238 Caret,
239
240 #[token("(")]
244 LParen,
245 #[token(")")]
246 RParen,
247 #[token("[")]
248 LBracket,
249 #[token("]")]
250 RBracket,
251 #[token("{")]
252 LBrace,
253 #[token("}")]
254 RBrace,
255 #[token(",")]
256 Comma,
257 #[token(".")]
258 Dot,
259 #[token(";")]
260 Semicolon,
261 #[token(":")]
262 Colon,
263 #[token("?")]
264 Question,
265 #[token("_")]
266 Underscore,
267 #[token("@")]
268 At,
269
270 #[regex(r"[\u{202A}-\u{202E}\u{2066}-\u{2069}]")]
276 Bidi,
277}
278
279fn ident_to_kind(s: &str) -> SyntaxKind {
281 match s {
282 "true" => KW_TRUE,
284 "false" => KW_FALSE,
285 "none" => KW_NONE,
286 "address" => KW_ADDRESS,
288 "bool" => KW_BOOL,
289 "field" => KW_FIELD,
290 "group" => KW_GROUP,
291 "scalar" => KW_SCALAR,
292 "signature" => KW_SIGNATURE,
293 "string" => KW_STRING,
294 "record" => KW_RECORD,
295 "dyn" => KW_DYN,
296 "identifier" => KW_IDENTIFIER,
297 "i8" => KW_I8,
298 "i16" => KW_I16,
299 "i32" => KW_I32,
300 "i64" => KW_I64,
301 "i128" => KW_I128,
302 "u8" => KW_U8,
303 "u16" => KW_U16,
304 "u32" => KW_U32,
305 "u64" => KW_U64,
306 "u128" => KW_U128,
307 "if" => KW_IF,
309 "else" => KW_ELSE,
310 "for" => KW_FOR,
311 "in" => KW_IN,
312 "return" => KW_RETURN,
313 "let" => KW_LET,
315 "const" => KW_CONST,
316 "constant" => KW_CONSTANT,
317 "final" => KW_FINAL,
318 "Final" => KW_FINAL_UPPER,
319 "view" => KW_VIEW,
320 "fn" => KW_FN,
321 "Fn" => KW_FN_UPPER,
322 "struct" => KW_STRUCT,
323 "constructor" => KW_CONSTRUCTOR,
324 "interface" => KW_INTERFACE,
325 "program" => KW_PROGRAM,
327 "import" => KW_IMPORT,
328 "mapping" => KW_MAPPING,
329 "storage" => KW_STORAGE,
330 "network" => KW_NETWORK,
331 "aleo" => KW_ALEO,
332 "script" => KW_SCRIPT,
333 "block" => KW_BLOCK,
334 "public" => KW_PUBLIC,
336 "private" => KW_PRIVATE,
337 "as" => KW_AS,
338 "self" => KW_SELF,
339 "assert" => KW_ASSERT,
340 "assert_eq" => KW_ASSERT_EQ,
341 "assert_neq" => KW_ASSERT_NEQ,
342 _ => IDENT,
344 }
345}
346
347fn strip_int_suffix(s: &str) -> Option<&str> {
349 let suffixes = ["u128", "i128", "u64", "i64", "u32", "i32", "u16", "i16", "u8", "i8"];
351 for suffix in suffixes {
352 if let Some(prefix) = s.strip_suffix(suffix) {
353 return Some(prefix);
354 }
355 }
356 None
357}
358
359fn validate_integer_digits(text: &str, offset: usize, errors: &mut Vec<LexError>) {
362 let num_part = text
364 .strip_suffix("field")
365 .or_else(|| text.strip_suffix("group"))
366 .or_else(|| text.strip_suffix("scalar"))
367 .or_else(|| strip_int_suffix(text))
368 .unwrap_or(text);
369
370 let (digits, radix, _prefix_len): (&str, u32, usize) = if let Some(s) = num_part.strip_prefix("0x") {
372 (s, 16, 2)
373 } else if let Some(s) = num_part.strip_prefix("0X") {
374 (s, 16, 2)
375 } else if let Some(s) = num_part.strip_prefix("0o") {
376 (s, 8, 2)
377 } else if let Some(s) = num_part.strip_prefix("0O") {
378 (s, 8, 2)
379 } else if let Some(s) = num_part.strip_prefix("0b") {
380 (s, 2, 2)
381 } else if let Some(s) = num_part.strip_prefix("0B") {
382 (s, 2, 2)
383 } else {
384 (num_part, 10, 0)
386 };
387
388 for (_, c) in digits.char_indices() {
390 if c == '_' {
391 continue; }
393 if !c.is_digit(radix) {
394 let error_end = offset + num_part.len();
396 errors.push(LexError {
397 range: TextRange::new(TextSize::new(offset as u32), TextSize::new(error_end as u32)),
398 kind: LexErrorKind::InvalidDigit { digit: c, radix, token: num_part.to_string() },
399 });
400 return; }
402 }
403}
404
405pub fn lex(source: &str) -> (Vec<Token>, Vec<LexError>) {
410 let mut tokens = Vec::new();
411 let mut errors = Vec::new();
412 let mut lexer = LogosToken::lexer(source);
413
414 while let Some(result) = lexer.next() {
415 let span = lexer.span();
416 let len = (span.end - span.start) as u32;
417 let slice = lexer.slice();
418
419 let kind = match result {
420 Ok(token) => match token {
421 LogosToken::Whitespace => WHITESPACE,
423 LogosToken::Linebreak => LINEBREAK,
424 LogosToken::CommentLine => COMMENT_LINE,
425 LogosToken::CommentBlock => {
426 if !slice.ends_with("*/") {
428 let preview_len = slice.len().min(10);
429 let preview = &slice[..preview_len];
430 errors.push(LexError {
431 range: TextRange::new(
432 TextSize::new(span.start as u32),
433 TextSize::new((span.start + 2) as u32), ),
435 kind: LexErrorKind::CouldNotLex { content: preview.to_string() },
436 });
437 }
438 COMMENT_BLOCK
439 }
440
441 LogosToken::AddressLiteral => ADDRESS_LIT,
443 LogosToken::Integer => INTEGER,
444 LogosToken::StaticString => STRING,
445 LogosToken::IdentifierLiteral => IDENT_LIT,
446
447 LogosToken::Ident => ident_to_kind(slice),
449 LogosToken::IdentIntrinsic => IDENT,
450 LogosToken::PathSpecial => IDENT, LogosToken::PowAssign => STAR2_EQ,
454 LogosToken::AndAssign => AMP2_EQ,
455 LogosToken::OrAssign => PIPE2_EQ,
456 LogosToken::ShlAssign => SHL_EQ,
457 LogosToken::ShrAssign => SHR_EQ,
458 LogosToken::Pow => STAR2,
459 LogosToken::And => AMP2,
460 LogosToken::Or => PIPE2,
461 LogosToken::Shl => SHL,
462 LogosToken::Shr => SHR,
463 LogosToken::EqEq => EQ2,
464 LogosToken::NotEq => BANG_EQ,
465 LogosToken::LtEq => LT_EQ,
466 LogosToken::GtEq => GT_EQ,
467 LogosToken::AddAssign => PLUS_EQ,
468 LogosToken::SubAssign => MINUS_EQ,
469 LogosToken::MulAssign => STAR_EQ,
470 LogosToken::DivAssign => SLASH_EQ,
471 LogosToken::RemAssign => PERCENT_EQ,
472 LogosToken::BitAndAssign => AMP_EQ,
473 LogosToken::BitOrAssign => PIPE_EQ,
474 LogosToken::BitXorAssign => CARET_EQ,
475 LogosToken::Arrow => ARROW,
476 LogosToken::FatArrow => FAT_ARROW,
477 LogosToken::DotDotEq => DOT_DOT_EQ,
478 LogosToken::DotDot => DOT_DOT,
479 LogosToken::ColonColon => COLON_COLON,
480
481 LogosToken::Eq => EQ,
483 LogosToken::Bang => BANG,
484 LogosToken::Lt => LT,
485 LogosToken::Gt => GT,
486 LogosToken::Plus => PLUS,
487 LogosToken::Minus => MINUS,
488 LogosToken::Star => STAR,
489 LogosToken::Slash => SLASH,
490 LogosToken::Percent => PERCENT,
491 LogosToken::Amp => AMP,
492 LogosToken::Pipe => PIPE,
493 LogosToken::Caret => CARET,
494
495 LogosToken::LParen => L_PAREN,
497 LogosToken::RParen => R_PAREN,
498 LogosToken::LBracket => L_BRACKET,
499 LogosToken::RBracket => R_BRACKET,
500 LogosToken::LBrace => L_BRACE,
501 LogosToken::RBrace => R_BRACE,
502 LogosToken::Comma => COMMA,
503 LogosToken::Dot => DOT,
504 LogosToken::Semicolon => SEMICOLON,
505 LogosToken::Colon => COLON,
506 LogosToken::Question => QUESTION,
507 LogosToken::Underscore => UNDERSCORE,
508 LogosToken::At => AT,
509
510 LogosToken::Bidi => {
512 errors.push(LexError {
513 range: TextRange::new(TextSize::new(span.start as u32), TextSize::new(span.end as u32)),
514 kind: LexErrorKind::BidiOverride,
515 });
516 ERROR
517 }
518 },
519 Err(()) => {
520 errors.push(LexError {
521 range: TextRange::new(TextSize::new(span.start as u32), TextSize::new(span.end as u32)),
522 kind: LexErrorKind::CouldNotLex { content: slice.to_string() },
523 });
524 ERROR
525 }
526 };
527
528 if kind == INTEGER {
530 validate_integer_digits(slice, span.start, &mut errors);
531 }
532
533 tokens.push(Token { kind, len });
534 }
535
536 tokens.push(Token { kind: EOF, len: 0 });
538
539 (tokens, errors)
540}
541
542#[cfg(test)]
543mod tests {
544 use super::*;
545 use expect_test::{Expect, expect};
546
547 fn check_lex(input: &str, expect: Expect) {
549 let (tokens, _errors) = lex(input);
550 let mut output = String::new();
551 let mut offset = 0usize;
552 for token in &tokens {
553 let text = &input[offset..offset + token.len as usize];
554 output.push_str(&format!("{:?} {:?}\n", token.kind, text));
555 offset += token.len as usize;
556 }
557 expect.assert_eq(&output);
558 }
559
560 fn check_lex_errors(input: &str, expect: Expect) {
562 let (_tokens, errors) = lex(input);
563 let output = errors
564 .iter()
565 .map(|e| format!("{}..{}:{:?}", u32::from(e.range.start()), u32::from(e.range.end()), e.kind))
566 .collect::<Vec<_>>()
567 .join("\n");
568 expect.assert_eq(&output);
569 }
570
571 #[test]
572 fn lex_empty() {
573 check_lex("", expect![[r#"
574 EOF ""
575 "#]]);
576 }
577
578 #[test]
579 fn lex_whitespace() {
580 check_lex(" \t ", expect![[r#"
581 WHITESPACE " \t "
582 EOF ""
583 "#]]);
584 }
585
586 #[test]
587 fn lex_linebreaks() {
588 check_lex("\n\r\n\n", expect![[r#"
589 LINEBREAK "\n"
590 LINEBREAK "\r\n"
591 LINEBREAK "\n"
592 EOF ""
593"#]]);
594 }
595
596 #[test]
597 fn lex_mixed_whitespace() {
598 check_lex(" \n \t\n", expect![[r#"
599 WHITESPACE " "
600 LINEBREAK "\n"
601 WHITESPACE " \t"
602 LINEBREAK "\n"
603 EOF ""
604 "#]]);
605 }
606
607 #[test]
608 fn lex_line_comments() {
609 check_lex("// hello\n// world", expect![[r#"
610 COMMENT_LINE "// hello"
611 LINEBREAK "\n"
612 COMMENT_LINE "// world"
613 EOF ""
614 "#]]);
615 }
616
617 #[test]
618 fn lex_block_comments() {
619 check_lex("/* hello */ /* multi\nline */", expect![[r#"
620 COMMENT_BLOCK "/* hello */"
621 WHITESPACE " "
622 COMMENT_BLOCK "/* multi\nline */"
623 EOF ""
624 "#]]);
625 }
626
627 #[test]
628 fn lex_identifiers() {
629 check_lex("foo Bar _baz x123", expect![[r#"
630 IDENT "foo"
631 WHITESPACE " "
632 IDENT "Bar"
633 WHITESPACE " "
634 IDENT "_baz"
635 WHITESPACE " "
636 IDENT "x123"
637 EOF ""
638 "#]]);
639 }
640
641 #[test]
642 fn lex_keywords() {
643 check_lex("let fn if return true false", expect![[r#"
644 KW_LET "let"
645 WHITESPACE " "
646 KW_FN "fn"
647 WHITESPACE " "
648 KW_IF "if"
649 WHITESPACE " "
650 KW_RETURN "return"
651 WHITESPACE " "
652 KW_TRUE "true"
653 WHITESPACE " "
654 KW_FALSE "false"
655 EOF ""
656 "#]]);
657 }
658
659 #[test]
660 fn lex_type_keywords() {
661 check_lex("u8 u16 u32 u64 u128 i8 i16 i32 i64 i128", expect![[r#"
662 KW_U8 "u8"
663 WHITESPACE " "
664 KW_U16 "u16"
665 WHITESPACE " "
666 KW_U32 "u32"
667 WHITESPACE " "
668 KW_U64 "u64"
669 WHITESPACE " "
670 KW_U128 "u128"
671 WHITESPACE " "
672 KW_I8 "i8"
673 WHITESPACE " "
674 KW_I16 "i16"
675 WHITESPACE " "
676 KW_I32 "i32"
677 WHITESPACE " "
678 KW_I64 "i64"
679 WHITESPACE " "
680 KW_I128 "i128"
681 EOF ""
682 "#]]);
683 }
684
685 #[test]
686 fn lex_more_type_keywords() {
687 check_lex("bool field group scalar address signature string record", expect![[r#"
688 KW_BOOL "bool"
689 WHITESPACE " "
690 KW_FIELD "field"
691 WHITESPACE " "
692 KW_GROUP "group"
693 WHITESPACE " "
694 KW_SCALAR "scalar"
695 WHITESPACE " "
696 KW_ADDRESS "address"
697 WHITESPACE " "
698 KW_SIGNATURE "signature"
699 WHITESPACE " "
700 KW_STRING "string"
701 WHITESPACE " "
702 KW_RECORD "record"
703 EOF ""
704 "#]]);
705 }
706
707 #[test]
708 fn lex_identifier_literal() {
709 check_lex("'foo' 'bar_baz' 'x'", expect![[r#"
710 IDENT_LIT "'foo'"
711 WHITESPACE " "
712 IDENT_LIT "'bar_baz'"
713 WHITESPACE " "
714 IDENT_LIT "'x'"
715 EOF ""
716 "#]]);
717 }
718
719 #[test]
720 fn lex_identifier_keyword() {
721 check_lex("identifier", expect![[r#"
722 KW_IDENTIFIER "identifier"
723 EOF ""
724 "#]]);
725 }
726
727 #[test]
728 fn lex_integers() {
729 check_lex("123 0xFF 0b101 0o77", expect![[r#"
730 INTEGER "123"
731 WHITESPACE " "
732 INTEGER "0xFF"
733 WHITESPACE " "
734 INTEGER "0b101"
735 WHITESPACE " "
736 INTEGER "0o77"
737 EOF ""
738 "#]]);
739 }
740
741 #[test]
742 fn lex_integers_with_underscores() {
743 check_lex("1_000_000 0xFF_FF", expect![[r#"
744 INTEGER "1_000_000"
745 WHITESPACE " "
746 INTEGER "0xFF_FF"
747 EOF ""
748 "#]]);
749 }
750
751 #[test]
752 fn lex_address_literal() {
753 check_lex("aleo1abc123", expect![[r#"
754 ADDRESS_LIT "aleo1abc123"
755 EOF ""
756 "#]]);
757 }
758
759 #[test]
760 fn lex_strings() {
761 check_lex(r#""hello" "world""#, expect![[r#"
762 STRING "\"hello\""
763 WHITESPACE " "
764 STRING "\"world\""
765 EOF ""
766 "#]]);
767 }
768
769 #[test]
770 fn lex_punctuation() {
771 check_lex("( ) [ ] { } , . ; : :: ? -> => _ @", expect![[r#"
772 L_PAREN "("
773 WHITESPACE " "
774 R_PAREN ")"
775 WHITESPACE " "
776 L_BRACKET "["
777 WHITESPACE " "
778 R_BRACKET "]"
779 WHITESPACE " "
780 L_BRACE "{"
781 WHITESPACE " "
782 R_BRACE "}"
783 WHITESPACE " "
784 COMMA ","
785 WHITESPACE " "
786 DOT "."
787 WHITESPACE " "
788 SEMICOLON ";"
789 WHITESPACE " "
790 COLON ":"
791 WHITESPACE " "
792 COLON_COLON "::"
793 WHITESPACE " "
794 QUESTION "?"
795 WHITESPACE " "
796 ARROW "->"
797 WHITESPACE " "
798 FAT_ARROW "=>"
799 WHITESPACE " "
800 UNDERSCORE "_"
801 WHITESPACE " "
802 AT "@"
803 EOF ""
804 "#]]);
805 }
806
807 #[test]
808 fn lex_arithmetic_operators() {
809 check_lex("+ - * / % **", expect![[r#"
810 PLUS "+"
811 WHITESPACE " "
812 MINUS "-"
813 WHITESPACE " "
814 STAR "*"
815 WHITESPACE " "
816 SLASH "/"
817 WHITESPACE " "
818 PERCENT "%"
819 WHITESPACE " "
820 STAR2 "**"
821 EOF ""
822 "#]]);
823 }
824
825 #[test]
826 fn lex_comparison_operators() {
827 check_lex("== != < <= > >=", expect![[r#"
828 EQ2 "=="
829 WHITESPACE " "
830 BANG_EQ "!="
831 WHITESPACE " "
832 LT "<"
833 WHITESPACE " "
834 LT_EQ "<="
835 WHITESPACE " "
836 GT ">"
837 WHITESPACE " "
838 GT_EQ ">="
839 EOF ""
840 "#]]);
841 }
842
843 #[test]
844 fn lex_logical_operators() {
845 check_lex("&& || !", expect![[r#"
846 AMP2 "&&"
847 WHITESPACE " "
848 PIPE2 "||"
849 WHITESPACE " "
850 BANG "!"
851 EOF ""
852 "#]]);
853 }
854
855 #[test]
856 fn lex_bitwise_operators() {
857 check_lex("& | ^ << >>", expect![[r#"
858 AMP "&"
859 WHITESPACE " "
860 PIPE "|"
861 WHITESPACE " "
862 CARET "^"
863 WHITESPACE " "
864 SHL "<<"
865 WHITESPACE " "
866 SHR ">>"
867 EOF ""
868 "#]]);
869 }
870
871 #[test]
872 fn lex_assignment_operators() {
873 check_lex("= += -= *= /= %= **= &&= ||=", expect![[r#"
874 EQ "="
875 WHITESPACE " "
876 PLUS_EQ "+="
877 WHITESPACE " "
878 MINUS_EQ "-="
879 WHITESPACE " "
880 STAR_EQ "*="
881 WHITESPACE " "
882 SLASH_EQ "/="
883 WHITESPACE " "
884 PERCENT_EQ "%="
885 WHITESPACE " "
886 STAR2_EQ "**="
887 WHITESPACE " "
888 AMP2_EQ "&&="
889 WHITESPACE " "
890 PIPE2_EQ "||="
891 EOF ""
892 "#]]);
893 }
894
895 #[test]
896 fn lex_more_assignment_operators() {
897 check_lex("&= |= ^= <<= >>=", expect![[r#"
898 AMP_EQ "&="
899 WHITESPACE " "
900 PIPE_EQ "|="
901 WHITESPACE " "
902 CARET_EQ "^="
903 WHITESPACE " "
904 SHL_EQ "<<="
905 WHITESPACE " "
906 SHR_EQ ">>="
907 EOF ""
908 "#]]);
909 }
910
911 #[test]
912 fn lex_dot_dot() {
913 check_lex("0..10", expect![[r#"
914 INTEGER "0"
915 DOT_DOT ".."
916 INTEGER "10"
917 EOF ""
918 "#]]);
919 }
920
921 #[test]
922 fn lex_simple_expression() {
923 check_lex("x + y * 2", expect![[r#"
924 IDENT "x"
925 WHITESPACE " "
926 PLUS "+"
927 WHITESPACE " "
928 IDENT "y"
929 WHITESPACE " "
930 STAR "*"
931 WHITESPACE " "
932 INTEGER "2"
933 EOF ""
934 "#]]);
935 }
936
937 #[test]
938 fn lex_function_call() {
939 check_lex("foo(a, b)", expect![[r#"
940 IDENT "foo"
941 L_PAREN "("
942 IDENT "a"
943 COMMA ","
944 WHITESPACE " "
945 IDENT "b"
946 R_PAREN ")"
947 EOF ""
948 "#]]);
949 }
950
951 #[test]
952 fn lex_function_definition() {
953 check_lex("fn add(x: u32) -> u32 {", expect![[r#"
954 KW_FN "fn"
955 WHITESPACE " "
956 IDENT "add"
957 L_PAREN "("
958 IDENT "x"
959 COLON ":"
960 WHITESPACE " "
961 KW_U32 "u32"
962 R_PAREN ")"
963 WHITESPACE " "
964 ARROW "->"
965 WHITESPACE " "
966 KW_U32 "u32"
967 WHITESPACE " "
968 L_BRACE "{"
969 EOF ""
970 "#]]);
971 }
972
973 #[test]
974 fn lex_let_statement() {
975 check_lex("let x: u32 = 42;", expect![[r#"
976 KW_LET "let"
977 WHITESPACE " "
978 IDENT "x"
979 COLON ":"
980 WHITESPACE " "
981 KW_U32 "u32"
982 WHITESPACE " "
983 EQ "="
984 WHITESPACE " "
985 INTEGER "42"
986 SEMICOLON ";"
987 EOF ""
988 "#]]);
989 }
990
991 #[test]
992 fn lex_typed_integers() {
993 check_lex("1000u32 42i64 0u8 255u128", expect![[r#"
995 INTEGER "1000u32"
996 WHITESPACE " "
997 INTEGER "42i64"
998 WHITESPACE " "
999 INTEGER "0u8"
1000 WHITESPACE " "
1001 INTEGER "255u128"
1002 EOF ""
1003 "#]]);
1004 }
1005
1006 #[test]
1007 fn lex_typed_integers_field() {
1008 check_lex("123field 456group 789scalar", expect![[r#"
1010 INTEGER "123field"
1011 WHITESPACE " "
1012 INTEGER "456group"
1013 WHITESPACE " "
1014 INTEGER "789scalar"
1015 EOF ""
1016 "#]]);
1017 }
1018
1019 #[test]
1020 fn lex_special_paths() {
1021 check_lex("group::GEN signature::verify Future::await", expect![[r#"
1023 IDENT "group::GEN"
1024 WHITESPACE " "
1025 IDENT "signature::verify"
1026 WHITESPACE " "
1027 IDENT "Future::await"
1028 EOF ""
1029 "#]]);
1030 }
1031
1032 #[test]
1033 fn lex_typed_integer_range() {
1034 check_lex("0u8..STOP", expect![[r#"
1036 INTEGER "0u8"
1037 DOT_DOT ".."
1038 IDENT "STOP"
1039 EOF ""
1040 "#]]);
1041 }
1042
1043 #[test]
1044 fn lex_error_unknown_char() {
1045 check_lex_errors("hello $ world", expect![[r#"6..7:CouldNotLex { content: "$" }"#]]);
1046 }
1047
1048 #[test]
1049 fn lex_invalid_hex_digit() {
1050 let (tokens, errors) = lex("0xGAu32");
1051 assert_eq!(tokens.len(), 2); assert!(!errors.is_empty());
1053 assert!(matches!(errors[0].kind, LexErrorKind::InvalidDigit { digit: 'G', radix: 16, .. }));
1054 }
1055
1056 #[test]
1057 fn lex_invalid_octal_digit() {
1058 let (_, errors) = lex("0o9u32");
1059 assert!(!errors.is_empty());
1060 assert!(matches!(errors[0].kind, LexErrorKind::InvalidDigit { digit: '9', radix: 8, .. }));
1061 }
1062
1063 #[test]
1064 fn lex_invalid_binary_digit() {
1065 let (_, errors) = lex("0b2u32");
1066 assert!(!errors.is_empty());
1067 assert!(matches!(errors[0].kind, LexErrorKind::InvalidDigit { digit: '2', radix: 2, .. }));
1068 }
1069
1070 #[test]
1071 fn lex_valid_hex_is_ok() {
1072 let (_, errors) = lex("0xDEADBEEFu64");
1073 assert!(errors.is_empty());
1074 }
1075
1076 #[test]
1077 fn lex_invalid_hex_lowercase() {
1078 let (_, errors) = lex("0xghu32");
1080 assert!(!errors.is_empty());
1081 assert!(matches!(errors[0].kind, LexErrorKind::InvalidDigit { digit: 'g', radix: 16, .. }));
1082 }
1083
1084 #[test]
1085 fn lex_bidi_override_error() {
1086 let (_, errors) = lex("let x\u{202E} = 1;");
1087 assert!(!errors.is_empty());
1088 assert!(matches!(errors[0].kind, LexErrorKind::BidiOverride));
1089 }
1090
1091 #[test]
1092 fn lex_unclosed_block_comment() {
1093 let (tokens, errors) = lex("/* unclosed");
1094 assert!(!errors.is_empty());
1095 assert!(matches!(errors[0].kind, LexErrorKind::CouldNotLex { .. }));
1096 assert!(tokens.iter().any(|t| t.kind == COMMENT_BLOCK));
1098 }
1099
1100 #[test]
1101 fn lex_nested_comment_not_supported() {
1102 let (tokens, errors) = lex("/* outer /* inner */");
1104 assert!(errors.is_empty());
1106 assert!(tokens.iter().any(|t| t.kind == COMMENT_BLOCK));
1111 }
1112
1113 #[test]
1114 fn lex_closed_comment_ok() {
1115 let (_, errors) = lex("/* closed */");
1116 assert!(errors.is_empty());
1117 }
1118}