1pub use logos::Logos;
4
5use crate::Span;
6use crate::grammar::v1::HeredocContext;
7use crate::grammar::v1::interpolate_dq_string;
8use crate::grammar::v1::interpolate_heredoc;
9use crate::grammar::v1::interpolate_sq_string;
10use crate::parser::Parser;
11use crate::parser::ParserToken;
12use crate::tree::SyntaxKind;
13
14pub fn is_ident(s: &str) -> bool {
16 let mut lexer = Token::lexer(s);
17 if !lexer.next().map(|r| r.is_ok()).unwrap_or(false) {
18 return false;
19 }
20
21 lexer.next().is_none()
22}
23
24#[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
26#[repr(u8)]
27pub enum EscapeToken {
28 #[token(r"\\")]
30 #[token(r"\n")]
31 #[token(r"\r")]
32 #[token(r"\t")]
33 #[token(r"\'")]
34 #[token(r#"\""#)]
35 #[token(r"\~")]
36 #[token(r"\$")]
37 Valid,
38
39 #[regex(r"\\\r?\n")]
41 Continuation,
42
43 #[regex(r"\\[0-7][0-7][0-7]")]
45 ValidOctal,
46
47 #[regex(r"\\[0-9]")]
49 InvalidOctal,
50
51 #[regex(r"\\x[0-9a-fA-F]{2}")]
53 ValidHex,
54
55 #[regex(r"\\x")]
57 InvalidHex,
58
59 #[regex(r"(\\u[0-9a-fA-F]{4})|(\\U[0-9a-fA-F]{8})")]
61 ValidUnicode,
62
63 #[token(r"\u")]
65 InvalidShortUnicode,
66
67 #[token(r"\U")]
69 InvalidUnicode,
70
71 #[token("\n")]
73 Newline,
74
75 #[token("\t")]
77 Tab,
78
79 #[regex(r"\\.", priority = 0)]
81 Unknown,
82
83 #[regex(r"[^\\\n\t]+")]
85 Text,
86}
87
88#[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
90#[repr(u8)]
91pub enum SQStringToken {
92 #[token("~{")]
96 #[token("${")]
97 PlaceholderStart,
98
99 #[regex(r"\\(\n|\r|.)")]
105 Escape,
106
107 #[regex(r"[^\\$~']+")]
109 Text,
110
111 #[token("$")]
113 DollarSign,
114
115 #[token("~")]
117 Tilde,
118
119 #[token("'")]
123 End,
124}
125
126#[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
128#[repr(u8)]
129pub enum DQStringToken {
130 #[token("~{")]
134 #[token("${")]
135 PlaceholderStart,
136
137 #[regex(r"\\(\n|\r|.)")]
143 Escape,
144
145 #[regex(r#"[^\\$~"]+"#)]
147 Text,
148
149 #[token("$")]
151 DollarSign,
152
153 #[token("~")]
155 Tilde,
156
157 #[token("\"")]
161 End,
162}
163
164#[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
167#[repr(u8)]
168pub enum HeredocToken {
169 #[token("${")]
176 DollarPlaceholderStart,
177
178 #[token("~{")]
182 TildePlaceholderStart,
183
184 #[regex(r"\\(\n|\r|.)")]
190 Escape,
191
192 #[regex(r"[^\\~$>]+")]
194 Text,
195
196 #[token("$")]
198 DollarSign,
199
200 #[token("~")]
202 Tilde,
203
204 #[token(">")]
208 SingleCloseAngle,
209
210 #[token(">>")]
214 DoubleCloseAngle,
215
216 #[token(">>>")]
220 End,
221}
222
223#[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
225#[repr(u8)]
226pub enum BraceCommandToken {
227 #[token("~{")]
231 #[token("${")]
232 PlaceholderStart,
233
234 #[regex(r"\\(\n|\r|.)")]
240 Escape,
241
242 #[regex(r"[^\\$~}]+")]
244 Text,
245
246 #[token("$")]
248 DollarSign,
249
250 #[token("~")]
252 Tilde,
253
254 #[token("}")]
258 End,
259}
260
261#[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
283#[repr(u8)]
284#[logos(subpattern exp = r"[eE][+-]?[0-9]+")]
285#[logos(subpattern id = r"[a-zA-Z][a-zA-Z0-9_]*")]
286pub enum Token {
287 #[regex(r"[ \t\r\n]+")]
289 Whitespace,
290
291 #[regex(r"#[^\r\n]*")]
293 Comment,
294
295 #[regex(r"[0-9]+(?&exp)")]
297 #[regex(r"[0-9]+\.[0-9]*(?&exp)?", priority = 5)]
298 #[regex(r"[0-9]*\.[0-9]+(?&exp)?")]
299 Float,
300
301 #[token("0")]
303 #[regex(r"[1-9][0-9]*")]
304 #[regex(r"0[0-7]+")]
305 #[regex(r"0[xX][0-9a-fA-F]+")]
306 Integer,
307
308 #[regex(r"(?&id)")]
310 Ident,
311
312 #[token("'")]
317 SingleQuote,
318
319 #[token("\"")]
324 DoubleQuote,
325
326 #[token("<<<")]
331 OpenHeredoc,
332 #[token(">>>")]
334 CloseHeredoc,
335
336 #[token("Array")]
338 ArrayTypeKeyword,
339 #[token("Boolean")]
341 BooleanTypeKeyword,
342 #[token("Directory")]
344 DirectoryTypeKeyword,
345 #[token("File")]
347 FileTypeKeyword,
348 #[token("Float")]
350 FloatTypeKeyword,
351 #[token("Int")]
353 IntTypeKeyword,
354 #[token("Map")]
356 MapTypeKeyword,
357 #[token("Object")]
359 ObjectTypeKeyword,
360 #[token("Pair")]
362 PairTypeKeyword,
363 #[token("String")]
365 StringTypeKeyword,
366 #[token("after")]
368 AfterKeyword,
369 #[token("alias")]
371 AliasKeyword,
372 #[token("as")]
374 AsKeyword,
375 #[token("call")]
377 CallKeyword,
378 #[token("command")]
380 CommandKeyword,
381 #[token("else")]
383 ElseKeyword,
384 #[token("env")]
386 EnvKeyword,
387 #[token("false")]
389 FalseKeyword,
390 #[token("hints")]
392 HintsKeyword,
393 #[token("if")]
395 IfKeyword,
396 #[token("in")]
398 InKeyword,
399 #[token("import")]
401 ImportKeyword,
402 #[token("input")]
404 InputKeyword,
405 #[token("meta")]
407 MetaKeyword,
408 #[token("None")]
410 NoneKeyword,
411 #[token("null")]
413 NullKeyword,
414 #[token("object")]
416 ObjectKeyword,
417 #[token("output")]
419 OutputKeyword,
420 #[token("parameter_meta")]
422 ParameterMetaKeyword,
423 #[token("requirements")]
425 RequirementsKeyword,
426 #[token("runtime")]
428 RuntimeKeyword,
429 #[token("scatter")]
431 ScatterKeyword,
432 #[token("struct")]
434 StructKeyword,
435 #[token("task")]
437 TaskKeyword,
438 #[token("then")]
440 ThenKeyword,
441 #[token("true")]
443 TrueKeyword,
444 #[token("version")]
446 VersionKeyword,
447 #[token("workflow")]
449 WorkflowKeyword,
450
451 #[token("{")]
453 OpenBrace,
454 #[token("}")]
456 CloseBrace,
457 #[token("[")]
459 OpenBracket,
460 #[token("]")]
462 CloseBracket,
463 #[token("=")]
465 Assignment,
466 #[token(":")]
468 Colon,
469 #[token(",")]
471 Comma,
472 #[token("(")]
474 OpenParen,
475 #[token(")")]
477 CloseParen,
478 #[token("?")]
480 QuestionMark,
481 #[token("!")]
483 Exclamation,
484 #[token("+")]
486 Plus,
487 #[token("-")]
489 Minus,
490 #[token("||")]
492 LogicalOr,
493 #[token("&&")]
495 LogicalAnd,
496 #[token("*")]
498 Asterisk,
499 #[token("**")]
501 Exponentiation,
502 #[token("/")]
504 Slash,
505 #[token("%")]
507 Percent,
508 #[token("==")]
510 Equal,
511 #[token("!=")]
513 NotEqual,
514 #[token("<=")]
516 LessEqual,
517 #[token(">=")]
519 GreaterEqual,
520 #[token("<")]
522 Less,
523 #[token(">")]
525 Greater,
526 #[token(".")]
528 Dot,
529
530 MAX,
533}
534
535const _: () = assert!(Token::MAX as u8 <= 128);
537
538impl<'a> ParserToken<'a> for Token {
539 fn into_syntax(self) -> SyntaxKind {
540 match self {
541 Self::Whitespace => SyntaxKind::Whitespace,
542 Self::Comment => SyntaxKind::Comment,
543 Self::Float => SyntaxKind::Float,
544 Self::Integer => SyntaxKind::Integer,
545 Self::Ident => SyntaxKind::Ident,
546 Self::SingleQuote => SyntaxKind::SingleQuote,
547 Self::DoubleQuote => SyntaxKind::DoubleQuote,
548 Self::OpenHeredoc => SyntaxKind::OpenHeredoc,
549 Self::CloseHeredoc => SyntaxKind::CloseHeredoc,
550 Self::ArrayTypeKeyword => SyntaxKind::ArrayTypeKeyword,
551 Self::BooleanTypeKeyword => SyntaxKind::BooleanTypeKeyword,
552 Self::DirectoryTypeKeyword => SyntaxKind::DirectoryTypeKeyword,
553 Self::FileTypeKeyword => SyntaxKind::FileTypeKeyword,
554 Self::FloatTypeKeyword => SyntaxKind::FloatTypeKeyword,
555 Self::IntTypeKeyword => SyntaxKind::IntTypeKeyword,
556 Self::MapTypeKeyword => SyntaxKind::MapTypeKeyword,
557 Self::ObjectTypeKeyword => SyntaxKind::ObjectTypeKeyword,
558 Self::PairTypeKeyword => SyntaxKind::PairTypeKeyword,
559 Self::StringTypeKeyword => SyntaxKind::StringTypeKeyword,
560 Self::AfterKeyword => SyntaxKind::AfterKeyword,
561 Self::AliasKeyword => SyntaxKind::AliasKeyword,
562 Self::AsKeyword => SyntaxKind::AsKeyword,
563 Self::CallKeyword => SyntaxKind::CallKeyword,
564 Self::CommandKeyword => SyntaxKind::CommandKeyword,
565 Self::ElseKeyword => SyntaxKind::ElseKeyword,
566 Self::EnvKeyword => SyntaxKind::EnvKeyword,
567 Self::FalseKeyword => SyntaxKind::FalseKeyword,
568 Self::HintsKeyword => SyntaxKind::HintsKeyword,
569 Self::IfKeyword => SyntaxKind::IfKeyword,
570 Self::InKeyword => SyntaxKind::InKeyword,
571 Self::ImportKeyword => SyntaxKind::ImportKeyword,
572 Self::InputKeyword => SyntaxKind::InputKeyword,
573 Self::MetaKeyword => SyntaxKind::MetaKeyword,
574 Self::NoneKeyword => SyntaxKind::NoneKeyword,
575 Self::NullKeyword => SyntaxKind::NullKeyword,
576 Self::ObjectKeyword => SyntaxKind::ObjectKeyword,
577 Self::OutputKeyword => SyntaxKind::OutputKeyword,
578 Self::ParameterMetaKeyword => SyntaxKind::ParameterMetaKeyword,
579 Self::RequirementsKeyword => SyntaxKind::RequirementsKeyword,
580 Self::RuntimeKeyword => SyntaxKind::RuntimeKeyword,
581 Self::ScatterKeyword => SyntaxKind::ScatterKeyword,
582 Self::StructKeyword => SyntaxKind::StructKeyword,
583 Self::TaskKeyword => SyntaxKind::TaskKeyword,
584 Self::ThenKeyword => SyntaxKind::ThenKeyword,
585 Self::TrueKeyword => SyntaxKind::TrueKeyword,
586 Self::VersionKeyword => SyntaxKind::VersionKeyword,
587 Self::WorkflowKeyword => SyntaxKind::WorkflowKeyword,
588 Self::OpenBrace => SyntaxKind::OpenBrace,
589 Self::CloseBrace => SyntaxKind::CloseBrace,
590 Self::OpenBracket => SyntaxKind::OpenBracket,
591 Self::CloseBracket => SyntaxKind::CloseBracket,
592 Self::Assignment => SyntaxKind::Assignment,
593 Self::Colon => SyntaxKind::Colon,
594 Self::Comma => SyntaxKind::Comma,
595 Self::OpenParen => SyntaxKind::OpenParen,
596 Self::CloseParen => SyntaxKind::CloseParen,
597 Self::QuestionMark => SyntaxKind::QuestionMark,
598 Self::Exclamation => SyntaxKind::Exclamation,
599 Self::Plus => SyntaxKind::Plus,
600 Self::Minus => SyntaxKind::Minus,
601 Self::LogicalOr => SyntaxKind::LogicalOr,
602 Self::LogicalAnd => SyntaxKind::LogicalAnd,
603 Self::Asterisk => SyntaxKind::Asterisk,
604 Self::Exponentiation => SyntaxKind::Exponentiation,
605 Self::Slash => SyntaxKind::Slash,
606 Self::Percent => SyntaxKind::Percent,
607 Self::Equal => SyntaxKind::Equal,
608 Self::NotEqual => SyntaxKind::NotEqual,
609 Self::LessEqual => SyntaxKind::LessEqual,
610 Self::GreaterEqual => SyntaxKind::GreaterEqual,
611 Self::Less => SyntaxKind::Less,
612 Self::Greater => SyntaxKind::Greater,
613 Self::Dot => SyntaxKind::Dot,
614 Self::MAX => unreachable!(),
615 }
616 }
617
618 fn into_raw(self) -> u8 {
619 self as u8
620 }
621
622 fn from_raw(token: u8) -> Self {
623 assert!(token < Self::MAX as u8, "invalid token value");
624 unsafe { std::mem::transmute(token) }
625 }
626
627 fn describe(self) -> &'static str {
628 match self {
629 Self::Whitespace => "whitespace",
630 Self::Comment => "comment",
631 Self::Float => "float",
632 Self::Integer => "integer",
633 Self::Ident => "identifier",
634 Self::SingleQuote | Self::DoubleQuote => "string",
635 Self::OpenHeredoc => "multi-line string",
636 Self::CloseHeredoc => "`>>>`",
637 Self::ArrayTypeKeyword => "`Array` keyword",
638 Self::BooleanTypeKeyword => "`Boolean` keyword",
639 Self::DirectoryTypeKeyword => "`Directory` keyword",
640 Self::FileTypeKeyword => "`File` keyword",
641 Self::FloatTypeKeyword => "`Float` keyword",
642 Self::IntTypeKeyword => "`Int` keyword",
643 Self::MapTypeKeyword => "`Map` keyword",
644 Self::ObjectTypeKeyword => "`Object` keyword",
645 Self::PairTypeKeyword => "`Pair` keyword",
646 Self::StringTypeKeyword => "`String` keyword",
647 Self::AfterKeyword => "`after` keyword",
648 Self::AliasKeyword => "`alias` keyword",
649 Self::AsKeyword => "`as` keyword",
650 Self::CallKeyword => "`call` keyword",
651 Self::CommandKeyword => "`command` keyword",
652 Self::ElseKeyword => "`else` keyword",
653 Self::EnvKeyword => "`env` keyword",
654 Self::FalseKeyword => "`false` keyword",
655 Self::HintsKeyword => "`hints` keyword",
656 Self::IfKeyword => "`if` keyword",
657 Self::InKeyword => "`in` keyword",
658 Self::ImportKeyword => "`import` keyword",
659 Self::InputKeyword => "`input` keyword",
660 Self::MetaKeyword => "`meta` keyword",
661 Self::NoneKeyword => "`None` keyword",
662 Self::NullKeyword => "`null` keyword",
663 Self::ObjectKeyword => "`object` keyword",
664 Self::OutputKeyword => "`output` keyword",
665 Self::ParameterMetaKeyword => "`parameter_meta` keyword",
666 Self::RequirementsKeyword => "`requirements` keyword",
667 Self::RuntimeKeyword => "`runtime` keyword",
668 Self::ScatterKeyword => "`scatter` keyword",
669 Self::StructKeyword => "`struct` keyword",
670 Self::TaskKeyword => "`task` keyword",
671 Self::ThenKeyword => "`then` keyword",
672 Self::TrueKeyword => "`true` keyword",
673 Self::VersionKeyword => "`version` keyword",
674 Self::WorkflowKeyword => "`workflow` keyword",
675 Self::OpenBrace => "`{`",
676 Self::CloseBrace => "`}`",
677 Self::OpenBracket => "`[`",
678 Self::CloseBracket => "`]`",
679 Self::Assignment => "`=`",
680 Self::Colon => "`:`",
681 Self::Comma => "`,`",
682 Self::OpenParen => "`(`",
683 Self::CloseParen => "`)`",
684 Self::QuestionMark => "`?`",
685 Self::Exclamation => "`!`",
686 Self::Plus => "`+`",
687 Self::Minus => "`-`",
688 Self::LogicalOr => "`||`",
689 Self::LogicalAnd => "`&&`",
690 Self::Asterisk => "`*`",
691 Self::Exponentiation => "`**`",
692 Self::Slash => "`/`",
693 Self::Percent => "`%`",
694 Self::Equal => "`==`",
695 Self::NotEqual => "`!=`",
696 Self::LessEqual => "`<=`",
697 Self::GreaterEqual => "`>=`",
698 Self::Less => "`<`",
699 Self::Greater => "`>`",
700 Self::Dot => "`.`",
701 Self::MAX => unreachable!(),
702 }
703 }
704
705 fn is_trivia(self) -> bool {
706 matches!(self, Self::Whitespace | Self::Comment)
707 }
708
709 fn recover_interpolation(self, start: Span, parser: &mut Parser<'a, Self>) -> bool {
710 match self {
711 Self::SingleQuote => {
712 if let Err(e) = parser.interpolate(|i| interpolate_sq_string(start, true, i)) {
713 parser.diagnostic(e);
714 }
715 true
716 }
717 Self::DoubleQuote => {
718 if let Err(e) = parser.interpolate(|i| interpolate_dq_string(start, true, i)) {
719 parser.diagnostic(e);
720 }
721 true
722 }
723 Self::OpenHeredoc => {
724 if let Err(e) =
725 parser.interpolate(|i| interpolate_heredoc(start, HeredocContext::String, i))
726 {
727 parser.diagnostic(e);
728 }
729 true
730 }
731 _ => {
732 false
734 }
735 }
736 }
737}
738
739#[cfg(test)]
740mod test {
741 use pretty_assertions::assert_eq;
742
743 use super::*;
744 use crate::lexer::Lexer;
745 use crate::lexer::test::map;
746
747 #[test]
748 pub fn whitespace() {
749 let lexer = Lexer::<Token>::new(" \t\r\n");
750 let tokens: Vec<_> = lexer.map(map).collect();
751 assert_eq!(
752 tokens,
753 &[(Ok(Token::Whitespace), 0..4)],
754 "produced tokens did not match the expected set"
755 );
756 }
757
758 #[test]
759 fn comments() {
760 use Token::*;
761 let lexer = Lexer::<Token>::new(
762 r#"
763## first comment
764# second comment
765#### third comment"#,
766 );
767 let tokens: Vec<_> = lexer.map(map).collect();
768 assert_eq!(
769 tokens,
770 &[
771 (Ok(Whitespace), 0..1),
772 (Ok(Comment), 1..17),
773 (Ok(Whitespace), 17..18),
774 (Ok(Comment), 18..34),
775 (Ok(Whitespace), 34..35),
776 (Ok(Comment), 35..53)
777 ],
778 "produced tokens did not match the expected set"
779 );
780 }
781
782 #[test]
783 fn float() {
784 use Token::*;
785 let lexer = Lexer::<Token>::new(
786 r#"
7870.
7880.0
789.0
790.123
7910.123
792123.0
793123.123
794123e123
795123E123
796123e+123
797123E+123
798123e-123
799123E-123
800123.e123
801123.E123
802123.e+123
803123.E+123
804123.e-123
805123.E-123
806.123e+123
807.123E+123
808.123e-123
809.123E-123
8100.123e+123
8110.123E+123
8120.123e-123
8130.123E-123
814123.123e123
815123.123E123
816123.123e+123
817123.123E+123
818123.123e-123
819123.123E-123"#,
820 );
821
822 let tokens: Vec<_> = lexer.map(map).collect();
823 assert_eq!(
824 tokens,
825 &[
826 (Ok(Whitespace), 0..1),
827 (Ok(Float), 1..3),
828 (Ok(Whitespace), 3..4),
829 (Ok(Float), 4..7),
830 (Ok(Whitespace), 7..8),
831 (Ok(Float), 8..10),
832 (Ok(Whitespace), 10..11),
833 (Ok(Float), 11..15),
834 (Ok(Whitespace), 15..16),
835 (Ok(Float), 16..21),
836 (Ok(Whitespace), 21..22),
837 (Ok(Float), 22..27),
838 (Ok(Whitespace), 27..28),
839 (Ok(Float), 28..35),
840 (Ok(Whitespace), 35..36),
841 (Ok(Float), 36..43),
842 (Ok(Whitespace), 43..44),
843 (Ok(Float), 44..51),
844 (Ok(Whitespace), 51..52),
845 (Ok(Float), 52..60),
846 (Ok(Whitespace), 60..61),
847 (Ok(Float), 61..69),
848 (Ok(Whitespace), 69..70),
849 (Ok(Float), 70..78),
850 (Ok(Whitespace), 78..79),
851 (Ok(Float), 79..87),
852 (Ok(Whitespace), 87..88),
853 (Ok(Float), 88..96),
854 (Ok(Whitespace), 96..97),
855 (Ok(Float), 97..105),
856 (Ok(Whitespace), 105..106),
857 (Ok(Float), 106..115),
858 (Ok(Whitespace), 115..116),
859 (Ok(Float), 116..125),
860 (Ok(Whitespace), 125..126),
861 (Ok(Float), 126..135),
862 (Ok(Whitespace), 135..136),
863 (Ok(Float), 136..145),
864 (Ok(Whitespace), 145..146),
865 (Ok(Float), 146..155),
866 (Ok(Whitespace), 155..156),
867 (Ok(Float), 156..165),
868 (Ok(Whitespace), 165..166),
869 (Ok(Float), 166..175),
870 (Ok(Whitespace), 175..176),
871 (Ok(Float), 176..185),
872 (Ok(Whitespace), 185..186),
873 (Ok(Float), 186..196),
874 (Ok(Whitespace), 196..197),
875 (Ok(Float), 197..207),
876 (Ok(Whitespace), 207..208),
877 (Ok(Float), 208..218),
878 (Ok(Whitespace), 218..219),
879 (Ok(Float), 219..229),
880 (Ok(Whitespace), 229..230),
881 (Ok(Float), 230..241),
882 (Ok(Whitespace), 241..242),
883 (Ok(Float), 242..253),
884 (Ok(Whitespace), 253..254),
885 (Ok(Float), 254..266),
886 (Ok(Whitespace), 266..267),
887 (Ok(Float), 267..279),
888 (Ok(Whitespace), 279..280),
889 (Ok(Float), 280..292),
890 (Ok(Whitespace), 292..293),
891 (Ok(Float), 293..305),
892 ],
893 );
894 }
895
896 #[test]
897 fn integer() {
898 use Token::*;
899 let lexer = Lexer::<Token>::new(
900 r#"
9010
9025
903123456789
90401234567
9050000
9060777
9070x0
9080X0
9090x123456789ABCDEF"#,
910 );
911 let tokens: Vec<_> = lexer.map(map).collect();
912 assert_eq!(
913 tokens,
914 &[
915 (Ok(Whitespace), 0..1),
916 (Ok(Integer), 1..2),
917 (Ok(Whitespace), 2..3),
918 (Ok(Integer), 3..4),
919 (Ok(Whitespace), 4..5),
920 (Ok(Integer), 5..14),
921 (Ok(Whitespace), 14..15),
922 (Ok(Integer), 15..23),
923 (Ok(Whitespace), 23..24),
924 (Ok(Integer), 24..28),
925 (Ok(Whitespace), 28..29),
926 (Ok(Integer), 29..33),
927 (Ok(Whitespace), 33..34),
928 (Ok(Integer), 34..37),
929 (Ok(Whitespace), 37..38),
930 (Ok(Integer), 38..41),
931 (Ok(Whitespace), 41..42),
932 (Ok(Integer), 42..59),
933 ],
934 );
935 }
936
937 #[test]
938 fn ident() {
939 use Token::*;
940
941 let lexer = Lexer::<Token>::new(
942 r#"
943foo
944Foo123
945F_B
946f_b
947foo_Bar123
948foo0123_bar0123_baz0123
949foo123_BAR"#,
950 );
951 let tokens: Vec<_> = lexer.map(map).collect();
952 assert_eq!(
953 tokens,
954 &[
955 (Ok(Whitespace), 0..1),
956 (Ok(Ident), 1..4),
957 (Ok(Whitespace), 4..5),
958 (Ok(Ident), 5..11),
959 (Ok(Whitespace), 11..12),
960 (Ok(Ident), 12..15),
961 (Ok(Whitespace), 15..16),
962 (Ok(Ident), 16..19),
963 (Ok(Whitespace), 19..20),
964 (Ok(Ident), 20..30),
965 (Ok(Whitespace), 30..31),
966 (Ok(Ident), 31..54),
967 (Ok(Whitespace), 54..55),
968 (Ok(Ident), 55..65),
969 ],
970 );
971 }
972
973 #[test]
974 fn single_quote_string() {
975 let mut lexer = Lexer::<Token>::new(r#"'hello \'~{name}${'!'}\': not \~{a var~$}'"#);
976 assert_eq!(lexer.next().map(map), Some((Ok(Token::SingleQuote), 0..1)));
977
978 let mut lexer = lexer.morph();
979 assert_eq!(lexer.next().map(map), Some((Ok(SQStringToken::Text), 1..7)));
980 assert_eq!(
981 lexer.next().map(map),
982 Some((Ok(SQStringToken::Escape), 7..9))
983 );
984 assert_eq!(
985 lexer.next().map(map),
986 Some((Ok(SQStringToken::PlaceholderStart), 9..11))
987 );
988
989 let mut lexer = lexer.morph();
990 assert_eq!(lexer.next().map(map), Some((Ok(Token::Ident), 11..15)));
991 assert_eq!(lexer.next().map(map), Some((Ok(Token::CloseBrace), 15..16)));
992
993 let mut lexer = lexer.morph();
994 assert_eq!(
995 lexer.next().map(map),
996 Some((Ok(SQStringToken::PlaceholderStart), 16..18))
997 );
998
999 let mut lexer = lexer.morph();
1000 assert_eq!(
1001 lexer.next().map(map),
1002 Some((Ok(Token::SingleQuote), 18..19))
1003 );
1004
1005 let mut lexer = lexer.morph();
1006 assert_eq!(
1007 lexer.next().map(map),
1008 Some((Ok(SQStringToken::Text), 19..20))
1009 );
1010 assert_eq!(
1011 lexer.next().map(map),
1012 Some((Ok(SQStringToken::End), 20..21))
1013 );
1014
1015 let mut lexer = lexer.morph();
1016 assert_eq!(lexer.next().map(map), Some((Ok(Token::CloseBrace), 21..22)));
1017
1018 let mut lexer = lexer.morph();
1019 assert_eq!(
1020 lexer.next().map(map),
1021 Some((Ok(SQStringToken::Escape), 22..24))
1022 );
1023 assert_eq!(
1024 lexer.next().map(map),
1025 Some((Ok(SQStringToken::Text), 24..30))
1026 );
1027 assert_eq!(
1028 lexer.next().map(map),
1029 Some((Ok(SQStringToken::Escape), 30..32))
1030 );
1031 assert_eq!(
1032 lexer.next().map(map),
1033 Some((Ok(SQStringToken::Text), 32..38))
1034 );
1035 assert_eq!(
1036 lexer.next().map(map),
1037 Some((Ok(SQStringToken::Tilde), 38..39))
1038 );
1039 assert_eq!(
1040 lexer.next().map(map),
1041 Some((Ok(SQStringToken::DollarSign), 39..40))
1042 );
1043 assert_eq!(
1044 lexer.next().map(map),
1045 Some((Ok(SQStringToken::Text), 40..41))
1046 );
1047 assert_eq!(
1048 lexer.next().map(map),
1049 Some((Ok(SQStringToken::End), 41..42))
1050 );
1051
1052 let mut lexer = lexer.morph::<Token>();
1053 assert_eq!(lexer.next().map(map), None);
1054 }
1055
1056 #[test]
1057 fn double_quote_string() {
1058 let mut lexer = Lexer::<Token>::new(r#""hello \"~{name}${"!"}\": not \~{a var~$}""#);
1059 assert_eq!(lexer.next().map(map), Some((Ok(Token::DoubleQuote), 0..1)));
1060
1061 let mut lexer = lexer.morph();
1062 assert_eq!(lexer.next().map(map), Some((Ok(DQStringToken::Text), 1..7)));
1063 assert_eq!(
1064 lexer.next().map(map),
1065 Some((Ok(DQStringToken::Escape), 7..9))
1066 );
1067 assert_eq!(
1068 lexer.next().map(map),
1069 Some((Ok(DQStringToken::PlaceholderStart), 9..11))
1070 );
1071
1072 let mut lexer = lexer.morph();
1073 assert_eq!(lexer.next().map(map), Some((Ok(Token::Ident), 11..15)));
1074 assert_eq!(lexer.next().map(map), Some((Ok(Token::CloseBrace), 15..16)));
1075
1076 let mut lexer = lexer.morph();
1077 assert_eq!(
1078 lexer.next().map(map),
1079 Some((Ok(DQStringToken::PlaceholderStart), 16..18))
1080 );
1081
1082 let mut lexer = lexer.morph();
1083 assert_eq!(
1084 lexer.next().map(map),
1085 Some((Ok(Token::DoubleQuote), 18..19))
1086 );
1087
1088 let mut lexer = lexer.morph();
1089 assert_eq!(
1090 lexer.next().map(map),
1091 Some((Ok(DQStringToken::Text), 19..20))
1092 );
1093 assert_eq!(
1094 lexer.next().map(map),
1095 Some((Ok(DQStringToken::End), 20..21))
1096 );
1097
1098 let mut lexer = lexer.morph();
1099 assert_eq!(lexer.next().map(map), Some((Ok(Token::CloseBrace), 21..22)));
1100
1101 let mut lexer = lexer.morph();
1102 assert_eq!(
1103 lexer.next().map(map),
1104 Some((Ok(DQStringToken::Escape), 22..24))
1105 );
1106 assert_eq!(
1107 lexer.next().map(map),
1108 Some((Ok(DQStringToken::Text), 24..30))
1109 );
1110 assert_eq!(
1111 lexer.next().map(map),
1112 Some((Ok(DQStringToken::Escape), 30..32))
1113 );
1114 assert_eq!(
1115 lexer.next().map(map),
1116 Some((Ok(DQStringToken::Text), 32..38))
1117 );
1118 assert_eq!(
1119 lexer.next().map(map),
1120 Some((Ok(DQStringToken::Tilde), 38..39))
1121 );
1122 assert_eq!(
1123 lexer.next().map(map),
1124 Some((Ok(DQStringToken::DollarSign), 39..40))
1125 );
1126 assert_eq!(
1127 lexer.next().map(map),
1128 Some((Ok(DQStringToken::Text), 40..41))
1129 );
1130 assert_eq!(
1131 lexer.next().map(map),
1132 Some((Ok(DQStringToken::End), 41..42))
1133 );
1134
1135 let mut lexer = lexer.morph::<Token>();
1136 assert_eq!(lexer.next().map(map), None);
1137 }
1138
1139 #[test]
1140 fn heredoc() {
1141 let mut lexer = Lexer::<Token>::new(
1142 r#"<<<
1143 printf "~{message}"
1144 printf "${var}"
1145 printf ~{"this should not close >>>"}
1146 printf "\~{escaped}"
1147 \>>>
1148 still in heredoc~
1149>>>"#,
1150 );
1151 assert_eq!(lexer.next().map(map), Some((Ok(Token::OpenHeredoc), 0..3)));
1152
1153 let mut lexer = lexer.morph();
1154 assert_eq!(lexer.next().map(map), Some((Ok(HeredocToken::Text), 3..15)));
1155 assert_eq!(
1156 lexer.next().map(map),
1157 Some((Ok(HeredocToken::TildePlaceholderStart), 15..17))
1158 );
1159
1160 let mut lexer = lexer.morph();
1161 assert_eq!(lexer.next().map(map), Some((Ok(Token::Ident), 17..24)));
1162 assert_eq!(lexer.next().map(map), Some((Ok(Token::CloseBrace), 24..25)));
1163
1164 let mut lexer = lexer.morph();
1165 assert_eq!(
1166 lexer.next().map(map),
1167 Some((Ok(HeredocToken::Text), 25..38))
1168 );
1169 assert_eq!(
1170 lexer.next().map(map),
1171 Some((Ok(HeredocToken::DollarPlaceholderStart), 38..40))
1172 );
1173
1174 let mut lexer = lexer.morph();
1175 assert_eq!(lexer.next().map(map), Some((Ok(Token::Ident), 40..43)));
1176 assert_eq!(lexer.next().map(map), Some((Ok(Token::CloseBrace), 43..44)));
1177
1178 let mut lexer = lexer.morph();
1179 assert_eq!(
1180 lexer.next().map(map),
1181 Some((Ok(HeredocToken::Text), 44..56))
1182 );
1183 assert_eq!(
1184 lexer.next().map(map),
1185 Some((Ok(HeredocToken::TildePlaceholderStart), 56..58))
1186 );
1187
1188 let mut lexer = lexer.morph();
1189 assert_eq!(
1190 lexer.next().map(map),
1191 Some((Ok(Token::DoubleQuote), 58..59))
1192 );
1193
1194 let mut lexer = lexer.morph();
1195 assert_eq!(
1196 lexer.next().map(map),
1197 Some((Ok(DQStringToken::Text), 59..84))
1198 );
1199 assert_eq!(
1200 lexer.next().map(map),
1201 Some((Ok(DQStringToken::End), 84..85))
1202 );
1203
1204 let mut lexer = lexer.morph();
1205 assert_eq!(lexer.next().map(map), Some((Ok(Token::CloseBrace), 85..86)));
1206
1207 let mut lexer = lexer.morph();
1208 assert_eq!(
1209 lexer.next().map(map),
1210 Some((Ok(HeredocToken::Text), 86..98))
1211 );
1212 assert_eq!(
1213 lexer.next().map(map),
1214 Some((Ok(HeredocToken::Escape), 98..100))
1215 );
1216 assert_eq!(
1217 lexer.next().map(map),
1218 Some((Ok(HeredocToken::Text), 100..114))
1219 );
1220 assert_eq!(
1221 lexer.next().map(map),
1222 Some((Ok(HeredocToken::Escape), 114..116))
1223 );
1224 assert_eq!(
1225 lexer.next().map(map),
1226 Some((Ok(HeredocToken::DoubleCloseAngle), 116..118))
1227 );
1228 assert_eq!(
1229 lexer.next().map(map),
1230 Some((Ok(HeredocToken::Text), 118..138))
1231 );
1232 assert_eq!(
1233 lexer.next().map(map),
1234 Some((Ok(HeredocToken::Tilde), 138..139))
1235 );
1236 assert_eq!(
1237 lexer.next().map(map),
1238 Some((Ok(HeredocToken::Text), 139..140))
1239 );
1240 assert_eq!(
1241 lexer.next().map(map),
1242 Some((Ok(HeredocToken::End), 140..143))
1243 );
1244
1245 let mut lexer = lexer.morph::<Token>();
1246 assert_eq!(lexer.next().map(map), None);
1247 }
1248
1249 #[test]
1250 fn brace_command() {
1251 let mut lexer = Lexer::<Token>::new(
1252 r#"command {
1253 printf "~{message}"
1254 printf "${var}"
1255 printf ~{"this should not close }"}
1256 printf "\~{escaped\}"
1257 printf "\${also escaped\}"
1258 printf "still in command$~"
1259}"#,
1260 );
1261 assert_eq!(
1262 lexer.next().map(map),
1263 Some((Ok(Token::CommandKeyword), 0..7)),
1264 );
1265 assert_eq!(lexer.next().map(map), Some((Ok(Token::Whitespace), 7..8)));
1266 assert_eq!(lexer.next().map(map), Some((Ok(Token::OpenBrace), 8..9)));
1267
1268 let mut lexer = lexer.morph();
1269 assert_eq!(
1270 lexer.next().map(map),
1271 Some((Ok(BraceCommandToken::Text), 9..21))
1272 );
1273 assert_eq!(
1274 lexer.next().map(map),
1275 Some((Ok(BraceCommandToken::PlaceholderStart), 21..23))
1276 );
1277
1278 let mut lexer = lexer.morph();
1279 assert_eq!(lexer.next().map(map), Some((Ok(Token::Ident), 23..30)));
1280 assert_eq!(lexer.next().map(map), Some((Ok(Token::CloseBrace), 30..31)));
1281
1282 let mut lexer = lexer.morph();
1283 assert_eq!(
1284 lexer.next().map(map),
1285 Some((Ok(BraceCommandToken::Text), 31..44))
1286 );
1287 assert_eq!(
1288 lexer.next().map(map),
1289 Some((Ok(BraceCommandToken::PlaceholderStart), 44..46))
1290 );
1291
1292 let mut lexer = lexer.morph();
1293 assert_eq!(lexer.next().map(map), Some((Ok(Token::Ident), 46..49)));
1294 assert_eq!(lexer.next().map(map), Some((Ok(Token::CloseBrace), 49..50)));
1295
1296 let mut lexer = lexer.morph();
1297 assert_eq!(
1298 lexer.next().map(map),
1299 Some((Ok(BraceCommandToken::Text), 50..62))
1300 );
1301 assert_eq!(
1302 lexer.next().map(map),
1303 Some((Ok(BraceCommandToken::PlaceholderStart), 62..64))
1304 );
1305
1306 let mut lexer = lexer.morph();
1307 assert_eq!(
1308 lexer.next().map(map),
1309 Some((Ok(Token::DoubleQuote), 64..65))
1310 );
1311
1312 let mut lexer = lexer.morph();
1313 assert_eq!(
1314 lexer.next().map(map),
1315 Some((Ok(DQStringToken::Text), 65..88))
1316 );
1317 assert_eq!(
1318 lexer.next().map(map),
1319 Some((Ok(DQStringToken::End), 88..89))
1320 );
1321
1322 let mut lexer = lexer.morph();
1323 assert_eq!(lexer.next().map(map), Some((Ok(Token::CloseBrace), 89..90)));
1324
1325 let mut lexer = lexer.morph();
1326 assert_eq!(
1327 lexer.next().map(map),
1328 Some((Ok(BraceCommandToken::Text), 90..102))
1329 );
1330 assert_eq!(
1331 lexer.next().map(map),
1332 Some((Ok(BraceCommandToken::Escape), 102..104))
1333 );
1334 assert_eq!(
1335 lexer.next().map(map),
1336 Some((Ok(BraceCommandToken::Text), 104..112))
1337 );
1338 assert_eq!(
1339 lexer.next().map(map),
1340 Some((Ok(BraceCommandToken::Escape), 112..114))
1341 );
1342 assert_eq!(
1343 lexer.next().map(map),
1344 Some((Ok(BraceCommandToken::Text), 114..127))
1345 );
1346 assert_eq!(
1347 lexer.next().map(map),
1348 Some((Ok(BraceCommandToken::Escape), 127..129))
1349 );
1350 assert_eq!(
1351 lexer.next().map(map),
1352 Some((Ok(BraceCommandToken::Text), 129..142))
1353 );
1354 assert_eq!(
1355 lexer.next().map(map),
1356 Some((Ok(BraceCommandToken::Escape), 142..144))
1357 );
1358 assert_eq!(
1359 lexer.next().map(map),
1360 Some((Ok(BraceCommandToken::Text), 144..173))
1361 );
1362 assert_eq!(
1363 lexer.next().map(map),
1364 Some((Ok(BraceCommandToken::DollarSign), 173..174))
1365 );
1366 assert_eq!(
1367 lexer.next().map(map),
1368 Some((Ok(BraceCommandToken::Tilde), 174..175))
1369 );
1370 assert_eq!(
1371 lexer.next().map(map),
1372 Some((Ok(BraceCommandToken::Text), 175..177))
1373 );
1374 assert_eq!(
1375 lexer.next().map(map),
1376 Some((Ok(BraceCommandToken::End), 177..178))
1377 );
1378
1379 let mut lexer = lexer.morph::<Token>();
1380 assert_eq!(lexer.next().map(map), None);
1381 }
1382
1383 #[test]
1384 fn keywords() {
1385 use Token::*;
1386
1387 let lexer = Lexer::<Token>::new(
1388 r#"
1389Array
1390Boolean
1391Directory
1392File
1393Float
1394Int
1395Map
1396None
1397Object
1398Pair
1399String
1400after
1401alias
1402as
1403call
1404command
1405else
1406false
1407if
1408hints
1409in
1410import
1411input
1412meta
1413null
1414object
1415output
1416parameter_meta
1417requirements
1418runtime
1419scatter
1420struct
1421task
1422then
1423true
1424version
1425workflow
1426env"#,
1427 );
1428 let tokens: Vec<_> = lexer.map(map).collect();
1429 assert_eq!(
1430 tokens,
1431 &[
1432 (Ok(Whitespace), 0..1),
1433 (Ok(ArrayTypeKeyword), 1..6),
1434 (Ok(Whitespace), 6..7),
1435 (Ok(BooleanTypeKeyword), 7..14),
1436 (Ok(Whitespace), 14..15),
1437 (Ok(DirectoryTypeKeyword), 15..24),
1438 (Ok(Whitespace), 24..25),
1439 (Ok(FileTypeKeyword), 25..29),
1440 (Ok(Whitespace), 29..30),
1441 (Ok(FloatTypeKeyword), 30..35),
1442 (Ok(Whitespace), 35..36),
1443 (Ok(IntTypeKeyword), 36..39),
1444 (Ok(Whitespace), 39..40),
1445 (Ok(MapTypeKeyword), 40..43),
1446 (Ok(Whitespace), 43..44),
1447 (Ok(NoneKeyword), 44..48),
1448 (Ok(Whitespace), 48..49),
1449 (Ok(ObjectTypeKeyword), 49..55),
1450 (Ok(Whitespace), 55..56),
1451 (Ok(PairTypeKeyword), 56..60),
1452 (Ok(Whitespace), 60..61),
1453 (Ok(StringTypeKeyword), 61..67),
1454 (Ok(Whitespace), 67..68),
1455 (Ok(AfterKeyword), 68..73),
1456 (Ok(Whitespace), 73..74),
1457 (Ok(AliasKeyword), 74..79),
1458 (Ok(Whitespace), 79..80),
1459 (Ok(AsKeyword), 80..82),
1460 (Ok(Whitespace), 82..83),
1461 (Ok(CallKeyword), 83..87),
1462 (Ok(Whitespace), 87..88),
1463 (Ok(CommandKeyword), 88..95),
1464 (Ok(Whitespace), 95..96),
1465 (Ok(ElseKeyword), 96..100),
1466 (Ok(Whitespace), 100..101),
1467 (Ok(FalseKeyword), 101..106),
1468 (Ok(Whitespace), 106..107),
1469 (Ok(IfKeyword), 107..109),
1470 (Ok(Whitespace), 109..110),
1471 (Ok(HintsKeyword), 110..115),
1472 (Ok(Whitespace), 115..116),
1473 (Ok(InKeyword), 116..118),
1474 (Ok(Whitespace), 118..119),
1475 (Ok(ImportKeyword), 119..125),
1476 (Ok(Whitespace), 125..126),
1477 (Ok(InputKeyword), 126..131),
1478 (Ok(Whitespace), 131..132),
1479 (Ok(MetaKeyword), 132..136),
1480 (Ok(Whitespace), 136..137),
1481 (Ok(NullKeyword), 137..141),
1482 (Ok(Whitespace), 141..142),
1483 (Ok(ObjectKeyword), 142..148),
1484 (Ok(Whitespace), 148..149),
1485 (Ok(OutputKeyword), 149..155),
1486 (Ok(Whitespace), 155..156),
1487 (Ok(ParameterMetaKeyword), 156..170),
1488 (Ok(Whitespace), 170..171),
1489 (Ok(RequirementsKeyword), 171..183),
1490 (Ok(Whitespace), 183..184),
1491 (Ok(RuntimeKeyword), 184..191),
1492 (Ok(Whitespace), 191..192),
1493 (Ok(ScatterKeyword), 192..199),
1494 (Ok(Whitespace), 199..200),
1495 (Ok(StructKeyword), 200..206),
1496 (Ok(Whitespace), 206..207),
1497 (Ok(TaskKeyword), 207..211),
1498 (Ok(Whitespace), 211..212),
1499 (Ok(ThenKeyword), 212..216),
1500 (Ok(Whitespace), 216..217),
1501 (Ok(TrueKeyword), 217..221),
1502 (Ok(Whitespace), 221..222),
1503 (Ok(VersionKeyword), 222..229),
1504 (Ok(Whitespace), 229..230),
1505 (Ok(WorkflowKeyword), 230..238),
1506 (Ok(Whitespace), 238..239),
1507 (Ok(EnvKeyword), 239..242),
1508 ],
1509 );
1510 }
1511
1512 #[test]
1513 fn symbols() {
1514 use Token::*;
1515
1516 let lexer = Lexer::<Token>::new(r#"{}[]=:,()?!+-||&&*/%==!=<=>=<>.**"#);
1517 let tokens: Vec<_> = lexer.map(map).collect();
1518 assert_eq!(
1519 tokens,
1520 &[
1521 (Ok(OpenBrace), 0..1),
1522 (Ok(CloseBrace), 1..2),
1523 (Ok(OpenBracket), 2..3),
1524 (Ok(CloseBracket), 3..4),
1525 (Ok(Assignment), 4..5),
1526 (Ok(Colon), 5..6),
1527 (Ok(Comma), 6..7),
1528 (Ok(OpenParen), 7..8),
1529 (Ok(CloseParen), 8..9),
1530 (Ok(QuestionMark), 9..10),
1531 (Ok(Exclamation), 10..11),
1532 (Ok(Plus), 11..12),
1533 (Ok(Minus), 12..13),
1534 (Ok(LogicalOr), 13..15),
1535 (Ok(LogicalAnd), 15..17),
1536 (Ok(Asterisk), 17..18),
1537 (Ok(Slash), 18..19),
1538 (Ok(Percent), 19..20),
1539 (Ok(Equal), 20..22),
1540 (Ok(NotEqual), 22..24),
1541 (Ok(LessEqual), 24..26),
1542 (Ok(GreaterEqual), 26..28),
1543 (Ok(Less), 28..29),
1544 (Ok(Greater), 29..30),
1545 (Ok(Dot), 30..31),
1546 (Ok(Exponentiation), 31..33),
1547 ],
1548 );
1549 }
1550}