1use logos::Logos;
20
21#[derive(Logos, Debug, Clone, PartialEq)]
22pub(crate) enum LexicalToken {
23 #[regex(r"[ \t\r\n]+")]
26 Whitespace,
27 #[regex(r"//[^\n\r]*", allow_greedy = true)]
28 Comment,
29
30 #[token("param")]
32 Param,
33 #[token("node")]
34 Node,
35 #[token("const")]
36 Const,
37 #[token("if")]
38 If,
39 #[token("else")]
40 Else,
41 #[token("true")]
42 True,
43 #[token("false")]
44 False,
45 #[token("base")]
46 Base,
47 #[token("dim")]
48 Dimension,
49 #[token("unit")]
50 Unit,
51 #[token("type")]
52 Type,
53 #[token("index")]
54 Index,
55 #[token("for")]
56 For,
57 #[token("import")]
58 Import,
59 #[token("include")]
60 Include,
61 #[token("dag")]
62 Dag,
63 #[token("match")]
64 Match,
65 #[token("as")]
66 As,
67 #[token("assert")]
68 Assert,
69 #[token("table")]
70 Table,
71 #[token("plot")]
72 Plot,
73 #[token("figure")]
74 Figure,
75 #[token("layer")]
76 Layer,
77 #[token("scan")]
78 Scan,
79 #[token("unfold")]
80 Unfold,
81 #[token("linspace")]
82 Linspace,
83 #[token("step")]
84 Step,
85 #[token("pub")]
86 Pub,
87
88 #[regex(r#""[^"]*""#)]
90 StringLiteral,
91
92 #[token("+")]
94 Plus,
95 #[token("-")]
96 Minus,
97 #[token("*")]
98 Star,
99 #[token("/")]
100 Slash,
101 #[token("^")]
102 Caret,
103 #[token("%")]
104 Percent,
105 #[token("=")]
106 Eq,
107 #[token("==")]
108 EqEq,
109 #[token("!=")]
110 BangEq,
111 #[token("<")]
112 Lt,
113 #[token(">")]
114 Gt,
115 #[token("<=")]
116 LtEq,
117 #[token(">=")]
118 GtEq,
119 #[token("&&")]
120 AmpAmp,
121 #[token("||")]
122 PipePipe,
123 #[token("!")]
124 Bang,
125 #[token("->")]
126 Arrow,
127 #[token("|")]
128 Pipe,
129 #[token("=>")]
130 FatArrow,
131 #[token("~=")]
132 TildeEq,
133 #[token("+/-")]
134 PlusMinus,
135
136 #[token("#")]
138 Hash,
139
140 #[token("(")]
142 LParen,
143 #[token(")")]
144 RParen,
145 #[token("{")]
146 LBrace,
147 #[token("}")]
148 RBrace,
149 #[token("[")]
150 LBracket,
151 #[token("]")]
152 RBracket,
153 #[token(";")]
154 Semicolon,
155 #[token(",")]
156 Comma,
157 #[token("@")]
158 At,
159 #[token(":")]
160 Colon,
161 #[token(".")]
162 Dot,
163
164 #[token("_")]
166 Underscore,
167
168 #[regex(r"[a-zA-Z][a-zA-Z0-9_]*")]
170 Ident,
171
172 #[regex(r"[0-9][0-9_]*(\.[0-9][0-9_]*)?([eE][+-]?[0-9]+)?")]
174 Number,
175}
176
177#[derive(Debug, Clone, Copy, PartialEq, Eq)]
178pub(crate) enum TriviaToken {
179 Whitespace,
180 Comment,
181}
182
183#[derive(Debug, Clone, Copy, PartialEq, Eq)]
184pub(crate) enum LexicalItem {
185 Trivia(TriviaToken),
186 Syntax(Token),
187}
188
189impl LexicalToken {
190 #[must_use]
191 pub(crate) const fn classify(self) -> LexicalItem {
192 match self {
193 Self::Whitespace => LexicalItem::Trivia(TriviaToken::Whitespace),
194 Self::Comment => LexicalItem::Trivia(TriviaToken::Comment),
195 Self::Param => LexicalItem::Syntax(Token::Param),
196 Self::Node => LexicalItem::Syntax(Token::Node),
197 Self::Const => LexicalItem::Syntax(Token::Const),
198 Self::If => LexicalItem::Syntax(Token::If),
199 Self::Else => LexicalItem::Syntax(Token::Else),
200 Self::True => LexicalItem::Syntax(Token::True),
201 Self::False => LexicalItem::Syntax(Token::False),
202 Self::Base => LexicalItem::Syntax(Token::Base),
203 Self::Dimension => LexicalItem::Syntax(Token::Dimension),
204 Self::Unit => LexicalItem::Syntax(Token::Unit),
205 Self::Type => LexicalItem::Syntax(Token::Type),
206 Self::Index => LexicalItem::Syntax(Token::Index),
207 Self::For => LexicalItem::Syntax(Token::For),
208 Self::Import => LexicalItem::Syntax(Token::Import),
209 Self::Include => LexicalItem::Syntax(Token::Include),
210 Self::Dag => LexicalItem::Syntax(Token::Dag),
211 Self::Match => LexicalItem::Syntax(Token::Match),
212 Self::As => LexicalItem::Syntax(Token::As),
213 Self::Assert => LexicalItem::Syntax(Token::Assert),
214 Self::Table => LexicalItem::Syntax(Token::Table),
215 Self::Plot => LexicalItem::Syntax(Token::Plot),
216 Self::Figure => LexicalItem::Syntax(Token::Figure),
217 Self::Layer => LexicalItem::Syntax(Token::Layer),
218 Self::Scan => LexicalItem::Syntax(Token::Scan),
219 Self::Unfold => LexicalItem::Syntax(Token::Unfold),
220 Self::Linspace => LexicalItem::Syntax(Token::Linspace),
221 Self::Step => LexicalItem::Syntax(Token::Step),
222 Self::Pub => LexicalItem::Syntax(Token::Pub),
223 Self::StringLiteral => LexicalItem::Syntax(Token::StringLiteral),
224 Self::Plus => LexicalItem::Syntax(Token::Plus),
225 Self::Minus => LexicalItem::Syntax(Token::Minus),
226 Self::Star => LexicalItem::Syntax(Token::Star),
227 Self::Slash => LexicalItem::Syntax(Token::Slash),
228 Self::Caret => LexicalItem::Syntax(Token::Caret),
229 Self::Percent => LexicalItem::Syntax(Token::Percent),
230 Self::Eq => LexicalItem::Syntax(Token::Eq),
231 Self::EqEq => LexicalItem::Syntax(Token::EqEq),
232 Self::BangEq => LexicalItem::Syntax(Token::BangEq),
233 Self::Lt => LexicalItem::Syntax(Token::Lt),
234 Self::Gt => LexicalItem::Syntax(Token::Gt),
235 Self::LtEq => LexicalItem::Syntax(Token::LtEq),
236 Self::GtEq => LexicalItem::Syntax(Token::GtEq),
237 Self::AmpAmp => LexicalItem::Syntax(Token::AmpAmp),
238 Self::PipePipe => LexicalItem::Syntax(Token::PipePipe),
239 Self::Bang => LexicalItem::Syntax(Token::Bang),
240 Self::Arrow => LexicalItem::Syntax(Token::Arrow),
241 Self::Pipe => LexicalItem::Syntax(Token::Pipe),
242 Self::FatArrow => LexicalItem::Syntax(Token::FatArrow),
243 Self::TildeEq => LexicalItem::Syntax(Token::TildeEq),
244 Self::PlusMinus => LexicalItem::Syntax(Token::PlusMinus),
245 Self::Hash => LexicalItem::Syntax(Token::Hash),
246 Self::LParen => LexicalItem::Syntax(Token::LParen),
247 Self::RParen => LexicalItem::Syntax(Token::RParen),
248 Self::LBrace => LexicalItem::Syntax(Token::LBrace),
249 Self::RBrace => LexicalItem::Syntax(Token::RBrace),
250 Self::LBracket => LexicalItem::Syntax(Token::LBracket),
251 Self::RBracket => LexicalItem::Syntax(Token::RBracket),
252 Self::Semicolon => LexicalItem::Syntax(Token::Semicolon),
253 Self::Comma => LexicalItem::Syntax(Token::Comma),
254 Self::At => LexicalItem::Syntax(Token::At),
255 Self::Colon => LexicalItem::Syntax(Token::Colon),
256 Self::Dot => LexicalItem::Syntax(Token::Dot),
257 Self::Underscore => LexicalItem::Syntax(Token::Underscore),
258 Self::Ident => LexicalItem::Syntax(Token::Ident),
259 Self::Number => LexicalItem::Syntax(Token::Number),
260 }
261 }
262}
263
264#[derive(Debug, Clone, Copy, PartialEq, Eq)]
265pub enum Token {
266 Param,
268 Node,
269 Const,
270 If,
271 Else,
272 True,
273 False,
274 Base,
275 Dimension,
276 Unit,
277 Type,
278 Index,
279 For,
280 Import,
281 Include,
282 Dag,
283 Match,
284 As,
285 Assert,
286 Table,
287 Plot,
288 Figure,
289 Layer,
290 Scan,
291 Unfold,
292 Linspace,
293 Step,
294 Pub,
295
296 StringLiteral,
298
299 Plus,
301 Minus,
302 Star,
303 Slash,
304 Caret,
305 Percent,
306 Eq,
307 EqEq,
308 BangEq,
309 Lt,
310 Gt,
311 LtEq,
312 GtEq,
313 AmpAmp,
314 PipePipe,
315 Bang,
316 Arrow,
317 Pipe,
318 FatArrow,
319 TildeEq,
320 PlusMinus,
321
322 Hash,
324
325 LParen,
327 RParen,
328 LBrace,
329 RBrace,
330 LBracket,
331 RBracket,
332 Semicolon,
333 Comma,
334 At,
335 Colon,
336 Dot,
337
338 Underscore,
340
341 Ident,
343
344 Number,
346}
347
348impl std::fmt::Display for Token {
349 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
350 match self {
351 Self::Param => write!(f, "param"),
352 Self::Node => write!(f, "node"),
353 Self::Const => write!(f, "const"),
354 Self::If => write!(f, "if"),
355 Self::Else => write!(f, "else"),
356 Self::True => write!(f, "true"),
357 Self::False => write!(f, "false"),
358 Self::Base => write!(f, "base"),
359 Self::Dimension => write!(f, "dim"),
360 Self::Unit => write!(f, "unit"),
361 Self::Type => write!(f, "type"),
362 Self::Index => write!(f, "index"),
363 Self::For => write!(f, "for"),
364 Self::Import => write!(f, "import"),
365 Self::Include => write!(f, "include"),
366 Self::Dag => write!(f, "dag"),
367 Self::Match => write!(f, "match"),
368 Self::As => write!(f, "as"),
369 Self::Assert => write!(f, "assert"),
370 Self::Table => write!(f, "table"),
371 Self::Plot => write!(f, "plot"),
372 Self::Figure => write!(f, "figure"),
373 Self::Layer => write!(f, "layer"),
374 Self::Scan => write!(f, "scan"),
375 Self::Unfold => write!(f, "unfold"),
376 Self::Linspace => write!(f, "linspace"),
377 Self::Step => write!(f, "step"),
378 Self::Pub => write!(f, "pub"),
379 Self::StringLiteral => write!(f, "string"),
380 Self::Plus => write!(f, "+"),
381 Self::Minus => write!(f, "-"),
382 Self::Star => write!(f, "*"),
383 Self::Slash => write!(f, "/"),
384 Self::Caret => write!(f, "^"),
385 Self::Percent => write!(f, "%"),
386 Self::Eq => write!(f, "="),
387 Self::EqEq => write!(f, "=="),
388 Self::BangEq => write!(f, "!="),
389 Self::Lt => write!(f, "<"),
390 Self::Gt => write!(f, ">"),
391 Self::LtEq => write!(f, "<="),
392 Self::GtEq => write!(f, ">="),
393 Self::AmpAmp => write!(f, "&&"),
394 Self::PipePipe => write!(f, "||"),
395 Self::Bang => write!(f, "!"),
396 Self::Arrow => write!(f, "->"),
397 Self::Pipe => write!(f, "|"),
398 Self::FatArrow => write!(f, "=>"),
399 Self::TildeEq => write!(f, "~="),
400 Self::PlusMinus => write!(f, "+/-"),
401 Self::Hash => write!(f, "#"),
402 Self::LParen => write!(f, "("),
403 Self::RParen => write!(f, ")"),
404 Self::LBrace => write!(f, "{{"),
405 Self::RBrace => write!(f, "}}"),
406 Self::LBracket => write!(f, "["),
407 Self::RBracket => write!(f, "]"),
408 Self::Semicolon => write!(f, ";"),
409 Self::Comma => write!(f, ","),
410 Self::At => write!(f, "@"),
411 Self::Colon => write!(f, ":"),
412 Self::Dot => write!(f, "."),
413 Self::Underscore => write!(f, "_"),
414 Self::Ident => write!(f, "identifier"),
415 Self::Number => write!(f, "number"),
416 }
417 }
418}
419
420#[cfg(test)]
421mod tests {
422 use super::*;
423
424 fn lex_tokens(input: &str) -> Vec<Token> {
425 let mut lexer = crate::syntax::lexer::Lexer::new(input);
426 let mut tokens = Vec::new();
427 while let Some((token, _)) = lexer.next_token() {
428 tokens.push(token);
429 }
430 tokens
431 }
432
433 fn assert_single_token(input: &str, expected: Token) {
434 let mut lexer = crate::syntax::lexer::Lexer::new(input);
435 let Some((token, span)) = lexer.next_token() else {
436 panic!("expected one token");
437 };
438 assert_eq!(token, expected);
439 assert_eq!(lexer.slice_at(span), input);
440 assert_eq!(lexer.next_token(), None);
441 }
442
443 #[test]
444 fn lex_param_decl() {
445 let tokens = lex_tokens("param dry_mass = 1200.0;");
446 assert_eq!(
447 tokens,
448 vec![
449 Token::Param,
450 Token::Ident,
451 Token::Eq,
452 Token::Number,
453 Token::Semicolon,
454 ]
455 );
456 }
457
458 #[test]
459 fn lex_node_with_graph_ref() {
460 let tokens = lex_tokens("node v_exhaust = @isp * G0;");
461 assert_eq!(
462 tokens,
463 vec![
464 Token::Node,
465 Token::Ident,
466 Token::Eq,
467 Token::At,
468 Token::Ident,
469 Token::Star,
470 Token::Ident,
471 Token::Semicolon,
472 ]
473 );
474 }
475
476 #[test]
477 fn lex_const_decl() {
478 let tokens = lex_tokens("const node g0 = 9.80665;");
479 assert_eq!(
480 tokens,
481 vec![
482 Token::Const,
483 Token::Node,
484 Token::Ident,
485 Token::Eq,
486 Token::Number,
487 Token::Semicolon,
488 ]
489 );
490 }
491
492 #[test]
493 fn lex_scientific_notation() {
494 assert_single_token("3.98e5", Token::Number);
495 }
496
497 #[test]
498 fn lex_scientific_notation_negative_exponent() {
499 assert_single_token("1e-3", Token::Number);
500 }
501
502 #[test]
503 fn lex_underscore_separator() {
504 assert_single_token("200_000", Token::Number);
505 }
506
507 #[test]
508 fn lex_underscore_separator_with_decimal() {
509 assert_single_token("1_000.5", Token::Number);
510 }
511
512 #[test]
513 fn lex_integer() {
514 assert_single_token("42", Token::Number);
515 }
516
517 #[test]
518 fn lex_line_comment_skipped() {
519 let tokens = lex_tokens("// this is a comment\nparam x = 1.0;");
520 assert_eq!(tokens[0], Token::Param);
521 }
522
523 #[test]
524 fn lex_inline_comment_skipped() {
525 let tokens = lex_tokens("param x = 1.0; // inline comment");
526 assert_eq!(
527 tokens,
528 vec![
529 Token::Param,
530 Token::Ident,
531 Token::Eq,
532 Token::Number,
533 Token::Semicolon,
534 ]
535 );
536 }
537
538 #[test]
539 fn lex_if_else() {
540 let tokens = lex_tokens("if true { 1.0 } else { 2.0 }");
541 assert_eq!(
542 tokens,
543 vec![
544 Token::If,
545 Token::True,
546 Token::LBrace,
547 Token::Number,
548 Token::RBrace,
549 Token::Else,
550 Token::LBrace,
551 Token::Number,
552 Token::RBrace,
553 ]
554 );
555 }
556
557 #[test]
558 fn lex_comparison_operators() {
559 let tokens = lex_tokens("== != < > <= >=");
560 assert_eq!(
561 tokens,
562 vec![
563 Token::EqEq,
564 Token::BangEq,
565 Token::Lt,
566 Token::Gt,
567 Token::LtEq,
568 Token::GtEq,
569 ]
570 );
571 }
572
573 #[test]
574 fn lex_logical_operators() {
575 let tokens = lex_tokens("&& || !");
576 assert_eq!(tokens, vec![Token::AmpAmp, Token::PipePipe, Token::Bang,]);
577 }
578
579 #[test]
580 fn lex_attribute() {
581 let tokens = lex_tokens("#[lazy]");
582 assert_eq!(
583 tokens,
584 vec![Token::Hash, Token::LBracket, Token::Ident, Token::RBracket,]
585 );
586 }
587
588 #[test]
589 fn lex_attribute_with_args() {
590 let tokens = lex_tokens("#[assumes(x, y)]");
591 assert_eq!(
592 tokens,
593 vec![
594 Token::Hash,
595 Token::LBracket,
596 Token::Ident,
597 Token::LParen,
598 Token::Ident,
599 Token::Comma,
600 Token::Ident,
601 Token::RParen,
602 Token::RBracket,
603 ]
604 );
605 }
606
607 #[test]
608 fn lex_function_call() {
609 let tokens = lex_tokens("sqrt(@x)");
610 assert_eq!(
611 tokens,
612 vec![
613 Token::Ident,
614 Token::LParen,
615 Token::At,
616 Token::Ident,
617 Token::RParen,
618 ]
619 );
620 }
621
622 #[test]
623 fn lex_upper_ident_pi() {
624 assert_single_token("PI", Token::Ident);
625 }
626
627 #[test]
628 fn lex_booleans() {
629 let tokens = lex_tokens("true false");
630 assert_eq!(tokens, vec![Token::True, Token::False]);
631 }
632
633 #[test]
634 fn lex_keywords_not_identifiers() {
635 let tokens = lex_tokens(
637 "param node const if else base dim unit type index for import include dag match as assert table plot figure scan unfold linspace step pub",
638 );
639 assert_eq!(
640 tokens,
641 vec![
642 Token::Param,
643 Token::Node,
644 Token::Const,
645 Token::If,
646 Token::Else,
647 Token::Base,
648 Token::Dimension,
649 Token::Unit,
650 Token::Type,
651 Token::Index,
652 Token::For,
653 Token::Import,
654 Token::Include,
655 Token::Dag,
656 Token::Match,
657 Token::As,
658 Token::Assert,
659 Token::Table,
660 Token::Plot,
661 Token::Figure,
662 Token::Scan,
663 Token::Unfold,
664 Token::Linspace,
665 Token::Step,
666 Token::Pub,
667 ]
668 );
669 }
670
671 #[test]
672 fn lex_identifier_starting_with_new_keywords() {
673 for word in [
675 "baseline",
676 "scanner",
677 "unfolder",
678 "stepped",
679 "indexed",
680 "indexing",
681 "linspaced",
682 "tableau",
683 "parameter",
684 "typedef",
685 "importable",
686 "dagger",
687 "public",
688 "included",
689 ] {
690 assert_single_token(word, Token::Ident);
691 }
692 }
693
694 #[test]
695 fn lex_pascal_case_identifiers() {
696 let tokens = lex_tokens("Length Time Mass Velocity Dimensionless");
697 assert_eq!(
698 tokens,
699 vec![
700 Token::Ident,
701 Token::Ident,
702 Token::Ident,
703 Token::Ident,
704 Token::Ident,
705 ]
706 );
707 }
708
709 #[test]
710 fn lex_mixed_case_unit_identifiers() {
711 let tokens = lex_tokens("Pa Hz kN kPa MPa");
713 assert_eq!(
714 tokens,
715 vec![
716 Token::Ident,
717 Token::Ident,
718 Token::Ident,
719 Token::Ident,
720 Token::Ident,
721 ]
722 );
723 }
724
725 #[test]
726 fn lex_colon() {
727 let tokens = lex_tokens("param alt: Length = 400 km;");
728 assert_eq!(
729 tokens,
730 vec![
731 Token::Param,
732 Token::Ident,
733 Token::Colon,
734 Token::Ident,
735 Token::Eq,
736 Token::Number,
737 Token::Ident,
738 Token::Semicolon,
739 ]
740 );
741 }
742
743 #[test]
744 fn lex_arrow() {
745 let tokens = lex_tokens("@speed -> km");
746 assert_eq!(
747 tokens,
748 vec![Token::At, Token::Ident, Token::Arrow, Token::Ident,]
749 );
750 }
751
752 #[test]
753 fn lex_dimension_decl() {
754 let tokens = lex_tokens("dim Velocity = Length / Time;");
755 assert_eq!(
756 tokens,
757 vec![
758 Token::Dimension,
759 Token::Ident,
760 Token::Eq,
761 Token::Ident,
762 Token::Slash,
763 Token::Ident,
764 Token::Semicolon,
765 ]
766 );
767 }
768
769 #[test]
770 fn lex_unit_decl() {
771 let tokens = lex_tokens("unit km: Length = 1000 m;");
772 assert_eq!(
773 tokens,
774 vec![
775 Token::Unit,
776 Token::Ident,
777 Token::Colon,
778 Token::Ident,
779 Token::Eq,
780 Token::Number,
781 Token::Ident,
782 Token::Semicolon,
783 ]
784 );
785 }
786
787 #[test]
788 fn lex_type_decl() {
789 let tokens =
790 lex_tokens("type TransferResult { TransferResult(dv1: Velocity, dv2: Velocity) }");
791 assert_eq!(
792 tokens,
793 vec![
794 Token::Type, Token::Ident, Token::LBrace, Token::Ident, Token::LParen, Token::Ident, Token::Colon, Token::Ident, Token::Comma, Token::Ident, Token::Colon, Token::Ident, Token::RParen, Token::RBrace, ]
809 );
810 }
811
812 #[test]
813 fn lex_dot_field_access() {
814 let tokens = lex_tokens("@transfer.dv1");
815 assert_eq!(
816 tokens,
817 vec![Token::At, Token::Ident, Token::Dot, Token::Ident,]
818 );
819 }
820
821 #[test]
822 fn lex_import_statement() {
823 let tokens = lex_tokens("import helper.{G0, isp};");
824 assert_eq!(
825 tokens,
826 vec![
827 Token::Import,
828 Token::Ident, Token::Dot,
830 Token::LBrace,
831 Token::Ident, Token::Comma,
833 Token::Ident, Token::RBrace,
835 Token::Semicolon,
836 ]
837 );
838 }
839
840 #[test]
841 fn lex_string_literal() {
842 assert_single_token(r#""UTC""#, Token::StringLiteral);
844 }
845
846 #[test]
847 fn lex_use_statement_with_alias() {
848 let tokens = lex_tokens("import f.{x as y};");
849 assert_eq!(
850 tokens,
851 vec![
852 Token::Import,
853 Token::Ident, Token::Dot,
855 Token::LBrace,
856 Token::Ident, Token::As,
858 Token::Ident, Token::RBrace,
860 Token::Semicolon,
861 ]
862 );
863 }
864
865 #[test]
866 fn lex_dag_keyword() {
867 let tokens = lex_tokens("dag my_pipeline {}");
868 assert_eq!(
869 tokens,
870 vec![Token::Dag, Token::Ident, Token::LBrace, Token::RBrace,]
871 );
872 }
873
874 #[test]
875 fn lex_import_type() {
876 let tokens = lex_tokens("import f.{type T, T};");
877 assert_eq!(
878 tokens,
879 vec![
880 Token::Import,
881 Token::Ident, Token::Dot,
883 Token::LBrace,
884 Token::Type,
885 Token::Ident, Token::Comma,
887 Token::Ident,
888 Token::RBrace,
889 Token::Semicolon,
890 ]
891 );
892 }
893}