1use crate::{SyntaxKind, SyntaxKind::*};
24use logos::Logos;
25use rowan::{TextRange, TextSize};
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub struct Token {
30 pub kind: SyntaxKind,
32 pub len: u32,
34}
35
36#[derive(Debug, Clone, PartialEq, Eq)]
38pub enum LexErrorKind {
39 InvalidDigit { digit: char, radix: u32, token: String },
41 CouldNotLex { content: String },
43 BidiOverride,
45}
46
47#[derive(Debug, Clone, PartialEq, Eq)]
49pub struct LexError {
50 pub range: TextRange,
52 pub kind: LexErrorKind,
54}
55
56fn comment_block(lex: &mut logos::Lexer<LogosToken>) -> bool {
63 let mut last_asterisk = false;
64 for (index, c) in lex.remainder().char_indices() {
65 if c == '*' {
66 last_asterisk = true;
67 } else if c == '/' && last_asterisk {
68 lex.bump(index + 1);
69 return true;
70 } else if matches!(c, '\u{202A}'..='\u{202E}' | '\u{2066}'..='\u{2069}') {
71 lex.bump(index);
74 return true;
75 } else {
76 last_asterisk = false;
77 }
78 }
79 let remaining = lex.remainder().len();
81 lex.bump(remaining);
82 true
83}
84
85#[derive(Clone, Copy, Debug, PartialEq, Eq, Logos)]
90enum LogosToken {
91 #[regex(r"[ \t\f]+")]
95 Whitespace,
96
97 #[regex(r"\r?\n")]
98 Linebreak,
99
100 #[regex(r"//[^\r\n\u{202A}-\u{202E}\u{2066}-\u{2069}]*")]
102 CommentLine,
103
104 #[token(r"/*", comment_block)]
105 CommentBlock,
106
107 #[regex(r"aleo1[a-z0-9]*")]
113 AddressLiteral,
114
115 #[regex(r"0x[0-9A-Za-z_]+([ui](8|16|32|64|128)|field|group|scalar)?", priority = 3)]
120 #[regex(r"0o[0-9A-Za-z_]+([ui](8|16|32|64|128)|field|group|scalar)?", priority = 3)]
121 #[regex(r"0b[0-9A-Za-z_]+([ui](8|16|32|64|128)|field|group|scalar)?", priority = 3)]
122 #[regex(r"[0-9][0-9A-Za-z_]*([ui](8|16|32|64|128)|field|group|scalar)?")]
123 Integer,
124
125 #[regex(r#""[^"]*""#)]
126 StaticString,
127
128 #[regex(r"'[a-zA-Z][a-zA-Z0-9_]*'")]
130 IdentifierLiteral,
131
132 #[regex(r"group::[a-zA-Z][a-zA-Z0-9_]*")]
142 #[regex(r"signature::[a-zA-Z][a-zA-Z0-9_]*")]
143 #[regex(r"Future::[a-zA-Z][a-zA-Z0-9_]*")]
144 PathSpecial,
145
146 #[regex(r"_[a-zA-Z][a-zA-Z0-9_]*")]
148 IdentIntrinsic,
149
150 #[regex(r"[a-zA-Z][a-zA-Z0-9_]*")]
152 Ident,
153
154 #[token("**=")]
158 PowAssign,
159 #[token("&&=")]
160 AndAssign,
161 #[token("||=")]
162 OrAssign,
163 #[token("<<=")]
164 ShlAssign,
165 #[token(">>=")]
166 ShrAssign,
167
168 #[token("**")]
169 Pow,
170 #[token("&&")]
171 And,
172 #[token("||")]
173 Or,
174 #[token("<<")]
175 Shl,
176 #[token(">>")]
177 Shr,
178 #[token("==")]
179 EqEq,
180 #[token("!=")]
181 NotEq,
182 #[token("<=")]
183 LtEq,
184 #[token(">=")]
185 GtEq,
186 #[token("+=")]
187 AddAssign,
188 #[token("-=")]
189 SubAssign,
190 #[token("*=")]
191 MulAssign,
192 #[token("/=")]
193 DivAssign,
194 #[token("%=")]
195 RemAssign,
196 #[token("&=")]
197 BitAndAssign,
198 #[token("|=")]
199 BitOrAssign,
200 #[token("^=")]
201 BitXorAssign,
202
203 #[token("->")]
204 Arrow,
205 #[token("=>")]
206 FatArrow,
207 #[token("..=")]
208 DotDotEq,
209 #[token("..")]
210 DotDot,
211 #[token("::")]
212 ColonColon,
213
214 #[token("=")]
216 Eq,
217 #[token("!")]
218 Bang,
219 #[token("<")]
220 Lt,
221 #[token(">")]
222 Gt,
223 #[token("+")]
224 Plus,
225 #[token("-")]
226 Minus,
227 #[token("*")]
228 Star,
229 #[token("/")]
230 Slash,
231 #[token("%")]
232 Percent,
233 #[token("&")]
234 Amp,
235 #[token("|")]
236 Pipe,
237 #[token("^")]
238 Caret,
239
240 #[token("(")]
244 LParen,
245 #[token(")")]
246 RParen,
247 #[token("[")]
248 LBracket,
249 #[token("]")]
250 RBracket,
251 #[token("{")]
252 LBrace,
253 #[token("}")]
254 RBrace,
255 #[token(",")]
256 Comma,
257 #[token(".")]
258 Dot,
259 #[token(";")]
260 Semicolon,
261 #[token(":")]
262 Colon,
263 #[token("?")]
264 Question,
265 #[token("_")]
266 Underscore,
267 #[token("@")]
268 At,
269
270 #[regex(r"[\u{202A}-\u{202E}\u{2066}-\u{2069}]")]
276 Bidi,
277}
278
279fn ident_to_kind(s: &str) -> SyntaxKind {
281 match s {
282 "true" => KW_TRUE,
284 "false" => KW_FALSE,
285 "none" => KW_NONE,
286 "address" => KW_ADDRESS,
288 "bool" => KW_BOOL,
289 "field" => KW_FIELD,
290 "group" => KW_GROUP,
291 "scalar" => KW_SCALAR,
292 "signature" => KW_SIGNATURE,
293 "string" => KW_STRING,
294 "record" => KW_RECORD,
295 "dyn" => KW_DYN,
296 "identifier" => KW_IDENTIFIER,
297 "i8" => KW_I8,
298 "i16" => KW_I16,
299 "i32" => KW_I32,
300 "i64" => KW_I64,
301 "i128" => KW_I128,
302 "u8" => KW_U8,
303 "u16" => KW_U16,
304 "u32" => KW_U32,
305 "u64" => KW_U64,
306 "u128" => KW_U128,
307 "if" => KW_IF,
309 "else" => KW_ELSE,
310 "for" => KW_FOR,
311 "in" => KW_IN,
312 "return" => KW_RETURN,
313 "let" => KW_LET,
315 "const" => KW_CONST,
316 "constant" => KW_CONSTANT,
317 "final" => KW_FINAL,
318 "Final" => KW_FINAL_UPPER,
319 "fn" => KW_FN,
320 "Fn" => KW_FN_UPPER,
321 "struct" => KW_STRUCT,
322 "constructor" => KW_CONSTRUCTOR,
323 "interface" => KW_INTERFACE,
324 "program" => KW_PROGRAM,
326 "import" => KW_IMPORT,
327 "mapping" => KW_MAPPING,
328 "storage" => KW_STORAGE,
329 "network" => KW_NETWORK,
330 "aleo" => KW_ALEO,
331 "script" => KW_SCRIPT,
332 "block" => KW_BLOCK,
333 "public" => KW_PUBLIC,
335 "private" => KW_PRIVATE,
336 "as" => KW_AS,
337 "self" => KW_SELF,
338 "assert" => KW_ASSERT,
339 "assert_eq" => KW_ASSERT_EQ,
340 "assert_neq" => KW_ASSERT_NEQ,
341 _ => IDENT,
343 }
344}
345
346fn strip_int_suffix(s: &str) -> Option<&str> {
348 let suffixes = ["u128", "i128", "u64", "i64", "u32", "i32", "u16", "i16", "u8", "i8"];
350 for suffix in suffixes {
351 if let Some(prefix) = s.strip_suffix(suffix) {
352 return Some(prefix);
353 }
354 }
355 None
356}
357
358fn validate_integer_digits(text: &str, offset: usize, errors: &mut Vec<LexError>) {
361 let num_part = text
363 .strip_suffix("field")
364 .or_else(|| text.strip_suffix("group"))
365 .or_else(|| text.strip_suffix("scalar"))
366 .or_else(|| strip_int_suffix(text))
367 .unwrap_or(text);
368
369 let (digits, radix, _prefix_len): (&str, u32, usize) = if let Some(s) = num_part.strip_prefix("0x") {
371 (s, 16, 2)
372 } else if let Some(s) = num_part.strip_prefix("0X") {
373 (s, 16, 2)
374 } else if let Some(s) = num_part.strip_prefix("0o") {
375 (s, 8, 2)
376 } else if let Some(s) = num_part.strip_prefix("0O") {
377 (s, 8, 2)
378 } else if let Some(s) = num_part.strip_prefix("0b") {
379 (s, 2, 2)
380 } else if let Some(s) = num_part.strip_prefix("0B") {
381 (s, 2, 2)
382 } else {
383 (num_part, 10, 0)
385 };
386
387 for (_, c) in digits.char_indices() {
389 if c == '_' {
390 continue; }
392 if !c.is_digit(radix) {
393 let error_end = offset + num_part.len();
395 errors.push(LexError {
396 range: TextRange::new(TextSize::new(offset as u32), TextSize::new(error_end as u32)),
397 kind: LexErrorKind::InvalidDigit { digit: c, radix, token: num_part.to_string() },
398 });
399 return; }
401 }
402}
403
404pub fn lex(source: &str) -> (Vec<Token>, Vec<LexError>) {
409 let mut tokens = Vec::new();
410 let mut errors = Vec::new();
411 let mut lexer = LogosToken::lexer(source);
412
413 while let Some(result) = lexer.next() {
414 let span = lexer.span();
415 let len = (span.end - span.start) as u32;
416 let slice = lexer.slice();
417
418 let kind = match result {
419 Ok(token) => match token {
420 LogosToken::Whitespace => WHITESPACE,
422 LogosToken::Linebreak => LINEBREAK,
423 LogosToken::CommentLine => COMMENT_LINE,
424 LogosToken::CommentBlock => {
425 if !slice.ends_with("*/") {
427 let preview_len = slice.len().min(10);
428 let preview = &slice[..preview_len];
429 errors.push(LexError {
430 range: TextRange::new(
431 TextSize::new(span.start as u32),
432 TextSize::new((span.start + 2) as u32), ),
434 kind: LexErrorKind::CouldNotLex { content: preview.to_string() },
435 });
436 }
437 COMMENT_BLOCK
438 }
439
440 LogosToken::AddressLiteral => ADDRESS_LIT,
442 LogosToken::Integer => INTEGER,
443 LogosToken::StaticString => STRING,
444 LogosToken::IdentifierLiteral => IDENT_LIT,
445
446 LogosToken::Ident => ident_to_kind(slice),
448 LogosToken::IdentIntrinsic => IDENT,
449 LogosToken::PathSpecial => IDENT, LogosToken::PowAssign => STAR2_EQ,
453 LogosToken::AndAssign => AMP2_EQ,
454 LogosToken::OrAssign => PIPE2_EQ,
455 LogosToken::ShlAssign => SHL_EQ,
456 LogosToken::ShrAssign => SHR_EQ,
457 LogosToken::Pow => STAR2,
458 LogosToken::And => AMP2,
459 LogosToken::Or => PIPE2,
460 LogosToken::Shl => SHL,
461 LogosToken::Shr => SHR,
462 LogosToken::EqEq => EQ2,
463 LogosToken::NotEq => BANG_EQ,
464 LogosToken::LtEq => LT_EQ,
465 LogosToken::GtEq => GT_EQ,
466 LogosToken::AddAssign => PLUS_EQ,
467 LogosToken::SubAssign => MINUS_EQ,
468 LogosToken::MulAssign => STAR_EQ,
469 LogosToken::DivAssign => SLASH_EQ,
470 LogosToken::RemAssign => PERCENT_EQ,
471 LogosToken::BitAndAssign => AMP_EQ,
472 LogosToken::BitOrAssign => PIPE_EQ,
473 LogosToken::BitXorAssign => CARET_EQ,
474 LogosToken::Arrow => ARROW,
475 LogosToken::FatArrow => FAT_ARROW,
476 LogosToken::DotDotEq => DOT_DOT_EQ,
477 LogosToken::DotDot => DOT_DOT,
478 LogosToken::ColonColon => COLON_COLON,
479
480 LogosToken::Eq => EQ,
482 LogosToken::Bang => BANG,
483 LogosToken::Lt => LT,
484 LogosToken::Gt => GT,
485 LogosToken::Plus => PLUS,
486 LogosToken::Minus => MINUS,
487 LogosToken::Star => STAR,
488 LogosToken::Slash => SLASH,
489 LogosToken::Percent => PERCENT,
490 LogosToken::Amp => AMP,
491 LogosToken::Pipe => PIPE,
492 LogosToken::Caret => CARET,
493
494 LogosToken::LParen => L_PAREN,
496 LogosToken::RParen => R_PAREN,
497 LogosToken::LBracket => L_BRACKET,
498 LogosToken::RBracket => R_BRACKET,
499 LogosToken::LBrace => L_BRACE,
500 LogosToken::RBrace => R_BRACE,
501 LogosToken::Comma => COMMA,
502 LogosToken::Dot => DOT,
503 LogosToken::Semicolon => SEMICOLON,
504 LogosToken::Colon => COLON,
505 LogosToken::Question => QUESTION,
506 LogosToken::Underscore => UNDERSCORE,
507 LogosToken::At => AT,
508
509 LogosToken::Bidi => {
511 errors.push(LexError {
512 range: TextRange::new(TextSize::new(span.start as u32), TextSize::new(span.end as u32)),
513 kind: LexErrorKind::BidiOverride,
514 });
515 ERROR
516 }
517 },
518 Err(()) => {
519 errors.push(LexError {
520 range: TextRange::new(TextSize::new(span.start as u32), TextSize::new(span.end as u32)),
521 kind: LexErrorKind::CouldNotLex { content: slice.to_string() },
522 });
523 ERROR
524 }
525 };
526
527 if kind == INTEGER {
529 validate_integer_digits(slice, span.start, &mut errors);
530 }
531
532 tokens.push(Token { kind, len });
533 }
534
535 tokens.push(Token { kind: EOF, len: 0 });
537
538 (tokens, errors)
539}
540
541#[cfg(test)]
542mod tests {
543 use super::*;
544 use expect_test::{Expect, expect};
545
546 fn check_lex(input: &str, expect: Expect) {
548 let (tokens, _errors) = lex(input);
549 let mut output = String::new();
550 let mut offset = 0usize;
551 for token in &tokens {
552 let text = &input[offset..offset + token.len as usize];
553 output.push_str(&format!("{:?} {:?}\n", token.kind, text));
554 offset += token.len as usize;
555 }
556 expect.assert_eq(&output);
557 }
558
559 fn check_lex_errors(input: &str, expect: Expect) {
561 let (_tokens, errors) = lex(input);
562 let output = errors
563 .iter()
564 .map(|e| format!("{}..{}:{:?}", u32::from(e.range.start()), u32::from(e.range.end()), e.kind))
565 .collect::<Vec<_>>()
566 .join("\n");
567 expect.assert_eq(&output);
568 }
569
570 #[test]
571 fn lex_empty() {
572 check_lex("", expect![[r#"
573 EOF ""
574 "#]]);
575 }
576
577 #[test]
578 fn lex_whitespace() {
579 check_lex(" \t ", expect![[r#"
580 WHITESPACE " \t "
581 EOF ""
582 "#]]);
583 }
584
585 #[test]
586 fn lex_linebreaks() {
587 check_lex("\n\r\n\n", expect![[r#"
588 LINEBREAK "\n"
589 LINEBREAK "\r\n"
590 LINEBREAK "\n"
591 EOF ""
592"#]]);
593 }
594
595 #[test]
596 fn lex_mixed_whitespace() {
597 check_lex(" \n \t\n", expect![[r#"
598 WHITESPACE " "
599 LINEBREAK "\n"
600 WHITESPACE " \t"
601 LINEBREAK "\n"
602 EOF ""
603 "#]]);
604 }
605
606 #[test]
607 fn lex_line_comments() {
608 check_lex("// hello\n// world", expect![[r#"
609 COMMENT_LINE "// hello"
610 LINEBREAK "\n"
611 COMMENT_LINE "// world"
612 EOF ""
613 "#]]);
614 }
615
616 #[test]
617 fn lex_block_comments() {
618 check_lex("/* hello */ /* multi\nline */", expect![[r#"
619 COMMENT_BLOCK "/* hello */"
620 WHITESPACE " "
621 COMMENT_BLOCK "/* multi\nline */"
622 EOF ""
623 "#]]);
624 }
625
626 #[test]
627 fn lex_identifiers() {
628 check_lex("foo Bar _baz x123", expect![[r#"
629 IDENT "foo"
630 WHITESPACE " "
631 IDENT "Bar"
632 WHITESPACE " "
633 IDENT "_baz"
634 WHITESPACE " "
635 IDENT "x123"
636 EOF ""
637 "#]]);
638 }
639
640 #[test]
641 fn lex_keywords() {
642 check_lex("let fn if return true false", expect![[r#"
643 KW_LET "let"
644 WHITESPACE " "
645 KW_FN "fn"
646 WHITESPACE " "
647 KW_IF "if"
648 WHITESPACE " "
649 KW_RETURN "return"
650 WHITESPACE " "
651 KW_TRUE "true"
652 WHITESPACE " "
653 KW_FALSE "false"
654 EOF ""
655 "#]]);
656 }
657
658 #[test]
659 fn lex_type_keywords() {
660 check_lex("u8 u16 u32 u64 u128 i8 i16 i32 i64 i128", expect![[r#"
661 KW_U8 "u8"
662 WHITESPACE " "
663 KW_U16 "u16"
664 WHITESPACE " "
665 KW_U32 "u32"
666 WHITESPACE " "
667 KW_U64 "u64"
668 WHITESPACE " "
669 KW_U128 "u128"
670 WHITESPACE " "
671 KW_I8 "i8"
672 WHITESPACE " "
673 KW_I16 "i16"
674 WHITESPACE " "
675 KW_I32 "i32"
676 WHITESPACE " "
677 KW_I64 "i64"
678 WHITESPACE " "
679 KW_I128 "i128"
680 EOF ""
681 "#]]);
682 }
683
684 #[test]
685 fn lex_more_type_keywords() {
686 check_lex("bool field group scalar address signature string record", expect![[r#"
687 KW_BOOL "bool"
688 WHITESPACE " "
689 KW_FIELD "field"
690 WHITESPACE " "
691 KW_GROUP "group"
692 WHITESPACE " "
693 KW_SCALAR "scalar"
694 WHITESPACE " "
695 KW_ADDRESS "address"
696 WHITESPACE " "
697 KW_SIGNATURE "signature"
698 WHITESPACE " "
699 KW_STRING "string"
700 WHITESPACE " "
701 KW_RECORD "record"
702 EOF ""
703 "#]]);
704 }
705
706 #[test]
707 fn lex_identifier_literal() {
708 check_lex("'foo' 'bar_baz' 'x'", expect![[r#"
709 IDENT_LIT "'foo'"
710 WHITESPACE " "
711 IDENT_LIT "'bar_baz'"
712 WHITESPACE " "
713 IDENT_LIT "'x'"
714 EOF ""
715 "#]]);
716 }
717
718 #[test]
719 fn lex_identifier_keyword() {
720 check_lex("identifier", expect![[r#"
721 KW_IDENTIFIER "identifier"
722 EOF ""
723 "#]]);
724 }
725
726 #[test]
727 fn lex_integers() {
728 check_lex("123 0xFF 0b101 0o77", expect![[r#"
729 INTEGER "123"
730 WHITESPACE " "
731 INTEGER "0xFF"
732 WHITESPACE " "
733 INTEGER "0b101"
734 WHITESPACE " "
735 INTEGER "0o77"
736 EOF ""
737 "#]]);
738 }
739
740 #[test]
741 fn lex_integers_with_underscores() {
742 check_lex("1_000_000 0xFF_FF", expect![[r#"
743 INTEGER "1_000_000"
744 WHITESPACE " "
745 INTEGER "0xFF_FF"
746 EOF ""
747 "#]]);
748 }
749
750 #[test]
751 fn lex_address_literal() {
752 check_lex("aleo1abc123", expect![[r#"
753 ADDRESS_LIT "aleo1abc123"
754 EOF ""
755 "#]]);
756 }
757
758 #[test]
759 fn lex_strings() {
760 check_lex(r#""hello" "world""#, expect![[r#"
761 STRING "\"hello\""
762 WHITESPACE " "
763 STRING "\"world\""
764 EOF ""
765 "#]]);
766 }
767
768 #[test]
769 fn lex_punctuation() {
770 check_lex("( ) [ ] { } , . ; : :: ? -> => _ @", expect![[r#"
771 L_PAREN "("
772 WHITESPACE " "
773 R_PAREN ")"
774 WHITESPACE " "
775 L_BRACKET "["
776 WHITESPACE " "
777 R_BRACKET "]"
778 WHITESPACE " "
779 L_BRACE "{"
780 WHITESPACE " "
781 R_BRACE "}"
782 WHITESPACE " "
783 COMMA ","
784 WHITESPACE " "
785 DOT "."
786 WHITESPACE " "
787 SEMICOLON ";"
788 WHITESPACE " "
789 COLON ":"
790 WHITESPACE " "
791 COLON_COLON "::"
792 WHITESPACE " "
793 QUESTION "?"
794 WHITESPACE " "
795 ARROW "->"
796 WHITESPACE " "
797 FAT_ARROW "=>"
798 WHITESPACE " "
799 UNDERSCORE "_"
800 WHITESPACE " "
801 AT "@"
802 EOF ""
803 "#]]);
804 }
805
806 #[test]
807 fn lex_arithmetic_operators() {
808 check_lex("+ - * / % **", expect![[r#"
809 PLUS "+"
810 WHITESPACE " "
811 MINUS "-"
812 WHITESPACE " "
813 STAR "*"
814 WHITESPACE " "
815 SLASH "/"
816 WHITESPACE " "
817 PERCENT "%"
818 WHITESPACE " "
819 STAR2 "**"
820 EOF ""
821 "#]]);
822 }
823
824 #[test]
825 fn lex_comparison_operators() {
826 check_lex("== != < <= > >=", expect![[r#"
827 EQ2 "=="
828 WHITESPACE " "
829 BANG_EQ "!="
830 WHITESPACE " "
831 LT "<"
832 WHITESPACE " "
833 LT_EQ "<="
834 WHITESPACE " "
835 GT ">"
836 WHITESPACE " "
837 GT_EQ ">="
838 EOF ""
839 "#]]);
840 }
841
842 #[test]
843 fn lex_logical_operators() {
844 check_lex("&& || !", expect![[r#"
845 AMP2 "&&"
846 WHITESPACE " "
847 PIPE2 "||"
848 WHITESPACE " "
849 BANG "!"
850 EOF ""
851 "#]]);
852 }
853
854 #[test]
855 fn lex_bitwise_operators() {
856 check_lex("& | ^ << >>", expect![[r#"
857 AMP "&"
858 WHITESPACE " "
859 PIPE "|"
860 WHITESPACE " "
861 CARET "^"
862 WHITESPACE " "
863 SHL "<<"
864 WHITESPACE " "
865 SHR ">>"
866 EOF ""
867 "#]]);
868 }
869
870 #[test]
871 fn lex_assignment_operators() {
872 check_lex("= += -= *= /= %= **= &&= ||=", expect![[r#"
873 EQ "="
874 WHITESPACE " "
875 PLUS_EQ "+="
876 WHITESPACE " "
877 MINUS_EQ "-="
878 WHITESPACE " "
879 STAR_EQ "*="
880 WHITESPACE " "
881 SLASH_EQ "/="
882 WHITESPACE " "
883 PERCENT_EQ "%="
884 WHITESPACE " "
885 STAR2_EQ "**="
886 WHITESPACE " "
887 AMP2_EQ "&&="
888 WHITESPACE " "
889 PIPE2_EQ "||="
890 EOF ""
891 "#]]);
892 }
893
894 #[test]
895 fn lex_more_assignment_operators() {
896 check_lex("&= |= ^= <<= >>=", expect![[r#"
897 AMP_EQ "&="
898 WHITESPACE " "
899 PIPE_EQ "|="
900 WHITESPACE " "
901 CARET_EQ "^="
902 WHITESPACE " "
903 SHL_EQ "<<="
904 WHITESPACE " "
905 SHR_EQ ">>="
906 EOF ""
907 "#]]);
908 }
909
910 #[test]
911 fn lex_dot_dot() {
912 check_lex("0..10", expect![[r#"
913 INTEGER "0"
914 DOT_DOT ".."
915 INTEGER "10"
916 EOF ""
917 "#]]);
918 }
919
920 #[test]
921 fn lex_simple_expression() {
922 check_lex("x + y * 2", expect![[r#"
923 IDENT "x"
924 WHITESPACE " "
925 PLUS "+"
926 WHITESPACE " "
927 IDENT "y"
928 WHITESPACE " "
929 STAR "*"
930 WHITESPACE " "
931 INTEGER "2"
932 EOF ""
933 "#]]);
934 }
935
936 #[test]
937 fn lex_function_call() {
938 check_lex("foo(a, b)", expect![[r#"
939 IDENT "foo"
940 L_PAREN "("
941 IDENT "a"
942 COMMA ","
943 WHITESPACE " "
944 IDENT "b"
945 R_PAREN ")"
946 EOF ""
947 "#]]);
948 }
949
950 #[test]
951 fn lex_function_definition() {
952 check_lex("fn add(x: u32) -> u32 {", expect![[r#"
953 KW_FN "fn"
954 WHITESPACE " "
955 IDENT "add"
956 L_PAREN "("
957 IDENT "x"
958 COLON ":"
959 WHITESPACE " "
960 KW_U32 "u32"
961 R_PAREN ")"
962 WHITESPACE " "
963 ARROW "->"
964 WHITESPACE " "
965 KW_U32 "u32"
966 WHITESPACE " "
967 L_BRACE "{"
968 EOF ""
969 "#]]);
970 }
971
972 #[test]
973 fn lex_let_statement() {
974 check_lex("let x: u32 = 42;", expect![[r#"
975 KW_LET "let"
976 WHITESPACE " "
977 IDENT "x"
978 COLON ":"
979 WHITESPACE " "
980 KW_U32 "u32"
981 WHITESPACE " "
982 EQ "="
983 WHITESPACE " "
984 INTEGER "42"
985 SEMICOLON ";"
986 EOF ""
987 "#]]);
988 }
989
990 #[test]
991 fn lex_typed_integers() {
992 check_lex("1000u32 42i64 0u8 255u128", expect![[r#"
994 INTEGER "1000u32"
995 WHITESPACE " "
996 INTEGER "42i64"
997 WHITESPACE " "
998 INTEGER "0u8"
999 WHITESPACE " "
1000 INTEGER "255u128"
1001 EOF ""
1002 "#]]);
1003 }
1004
1005 #[test]
1006 fn lex_typed_integers_field() {
1007 check_lex("123field 456group 789scalar", expect![[r#"
1009 INTEGER "123field"
1010 WHITESPACE " "
1011 INTEGER "456group"
1012 WHITESPACE " "
1013 INTEGER "789scalar"
1014 EOF ""
1015 "#]]);
1016 }
1017
1018 #[test]
1019 fn lex_special_paths() {
1020 check_lex("group::GEN signature::verify Future::await", expect![[r#"
1022 IDENT "group::GEN"
1023 WHITESPACE " "
1024 IDENT "signature::verify"
1025 WHITESPACE " "
1026 IDENT "Future::await"
1027 EOF ""
1028 "#]]);
1029 }
1030
1031 #[test]
1032 fn lex_typed_integer_range() {
1033 check_lex("0u8..STOP", expect![[r#"
1035 INTEGER "0u8"
1036 DOT_DOT ".."
1037 IDENT "STOP"
1038 EOF ""
1039 "#]]);
1040 }
1041
1042 #[test]
1043 fn lex_error_unknown_char() {
1044 check_lex_errors("hello $ world", expect![[r#"6..7:CouldNotLex { content: "$" }"#]]);
1045 }
1046
1047 #[test]
1048 fn lex_invalid_hex_digit() {
1049 let (tokens, errors) = lex("0xGAu32");
1050 assert_eq!(tokens.len(), 2); assert!(!errors.is_empty());
1052 assert!(matches!(errors[0].kind, LexErrorKind::InvalidDigit { digit: 'G', radix: 16, .. }));
1053 }
1054
1055 #[test]
1056 fn lex_invalid_octal_digit() {
1057 let (_, errors) = lex("0o9u32");
1058 assert!(!errors.is_empty());
1059 assert!(matches!(errors[0].kind, LexErrorKind::InvalidDigit { digit: '9', radix: 8, .. }));
1060 }
1061
1062 #[test]
1063 fn lex_invalid_binary_digit() {
1064 let (_, errors) = lex("0b2u32");
1065 assert!(!errors.is_empty());
1066 assert!(matches!(errors[0].kind, LexErrorKind::InvalidDigit { digit: '2', radix: 2, .. }));
1067 }
1068
1069 #[test]
1070 fn lex_valid_hex_is_ok() {
1071 let (_, errors) = lex("0xDEADBEEFu64");
1072 assert!(errors.is_empty());
1073 }
1074
1075 #[test]
1076 fn lex_invalid_hex_lowercase() {
1077 let (_, errors) = lex("0xghu32");
1079 assert!(!errors.is_empty());
1080 assert!(matches!(errors[0].kind, LexErrorKind::InvalidDigit { digit: 'g', radix: 16, .. }));
1081 }
1082
1083 #[test]
1084 fn lex_bidi_override_error() {
1085 let (_, errors) = lex("let x\u{202E} = 1;");
1086 assert!(!errors.is_empty());
1087 assert!(matches!(errors[0].kind, LexErrorKind::BidiOverride));
1088 }
1089
1090 #[test]
1091 fn lex_unclosed_block_comment() {
1092 let (tokens, errors) = lex("/* unclosed");
1093 assert!(!errors.is_empty());
1094 assert!(matches!(errors[0].kind, LexErrorKind::CouldNotLex { .. }));
1095 assert!(tokens.iter().any(|t| t.kind == COMMENT_BLOCK));
1097 }
1098
1099 #[test]
1100 fn lex_nested_comment_not_supported() {
1101 let (tokens, errors) = lex("/* outer /* inner */");
1103 assert!(errors.is_empty());
1105 assert!(tokens.iter().any(|t| t.kind == COMMENT_BLOCK));
1110 }
1111
1112 #[test]
1113 fn lex_closed_comment_ok() {
1114 let (_, errors) = lex("/* closed */");
1115 assert!(errors.is_empty());
1116 }
1117}