1use {super::*, CompileErrorKind::*, TokenKind::*};
2
3pub(crate) struct Lexer<'src> {
12 chars: Chars<'src>,
14 indentation: Vec<&'src str>,
16 interpolation_stack: Vec<Token<'src>>,
18 next: Option<char>,
20 open_delimiters: Vec<(Delimiter, usize)>,
22 path: &'src Path,
24 recipe_body: bool,
26 recipe_body_pending: bool,
28 src: &'src str,
30 token_end: Position,
32 token_start: Position,
34 tokens: Vec<Token<'src>>,
36}
37
38impl<'src> Lexer<'src> {
39 pub(crate) const INTERPOLATION_END: &'static str = "}}";
40 pub(crate) const INTERPOLATION_ESCAPE: &'static str = "{{{{";
41 pub(crate) const INTERPOLATION_START: &'static str = "{{";
42
43 pub(crate) fn lex(path: &'src Path, src: &'src str) -> CompileResult<'src, Vec<Token<'src>>> {
45 Self::new(path, src).tokenize()
46 }
47
48 #[cfg(test)]
49 pub(crate) fn test_lex(src: &'src str) -> CompileResult<'src, Vec<Token<'src>>> {
50 Self::new("justfile".as_ref(), src).tokenize()
51 }
52
53 fn new(path: &'src Path, src: &'src str) -> Self {
55 let mut chars = src.chars();
56 let next = chars.next();
57
58 let start = Position {
59 offset: 0,
60 column: 0,
61 line: 0,
62 };
63
64 Self {
65 indentation: vec![""],
66 tokens: Vec::new(),
67 token_start: start,
68 token_end: start,
69 recipe_body_pending: false,
70 recipe_body: false,
71 interpolation_stack: Vec::new(),
72 open_delimiters: Vec::new(),
73 chars,
74 next,
75 src,
76 path,
77 }
78 }
79
80 fn advance(&mut self) -> CompileResult<'src> {
83 match self.next {
84 Some(c) => {
85 let len_utf8 = c.len_utf8();
86
87 self.token_end.offset += len_utf8;
88 self.token_end.column += len_utf8;
89
90 if c == '\n' {
91 self.token_end.column = 0;
92 self.token_end.line += 1;
93 }
94
95 self.next = self.chars.next();
96
97 Ok(())
98 }
99 None => Err(self.internal_error("Lexer advanced past end of text")),
100 }
101 }
102
103 fn lexeme(&self) -> &'src str {
105 &self.src[self.token_start.offset..self.token_end.offset]
106 }
107
108 fn current_token_length(&self) -> usize {
110 self.token_end.offset - self.token_start.offset
111 }
112
113 fn accepted(&mut self, c: char) -> CompileResult<'src, bool> {
114 if self.next_is(c) {
115 self.advance()?;
116 Ok(true)
117 } else {
118 Ok(false)
119 }
120 }
121
122 fn presume(&mut self, c: char) -> CompileResult<'src> {
123 if !self.next_is(c) {
124 return Err(self.internal_error(format!("Lexer presumed character `{c}`")));
125 }
126
127 self.advance()?;
128
129 Ok(())
130 }
131
132 fn presume_str(&mut self, s: &str) -> CompileResult<'src> {
133 for c in s.chars() {
134 self.presume(c)?;
135 }
136
137 Ok(())
138 }
139
140 fn next_is(&self, c: char) -> bool {
142 self.next == Some(c)
143 }
144
145 fn next_is_whitespace(&self) -> bool {
147 self.next_is(' ') || self.next_is('\t')
148 }
149
150 fn rest(&self) -> &'src str {
152 &self.src[self.token_end.offset..]
153 }
154
155 fn rest_starts_with(&self, prefix: &str) -> bool {
157 self.rest().starts_with(prefix)
158 }
159
160 fn at_eol(&self) -> bool {
162 self.next_is('\n') || self.rest_starts_with("\r\n")
163 }
164
165 fn at_eof(&self) -> bool {
167 self.rest().is_empty()
168 }
169
170 fn at_eol_or_eof(&self) -> bool {
172 self.at_eol() || self.at_eof()
173 }
174
175 fn indentation(&self) -> &'src str {
177 self.indentation.last().unwrap()
178 }
179
180 fn indented(&self) -> bool {
182 !self.indentation().is_empty()
183 }
184
185 fn token(&mut self, kind: TokenKind) {
188 self.tokens.push(Token {
189 column: self.token_start.column,
190 kind,
191 length: self.token_end.offset - self.token_start.offset,
192 line: self.token_start.line,
193 offset: self.token_start.offset,
194 path: self.path,
195 src: self.src,
196 });
197
198 self.token_start = self.token_end;
200 }
201
202 fn internal_error(&self, message: impl Into<String>) -> CompileError<'src> {
204 let token = Token {
206 src: self.src,
207 offset: self.token_end.offset,
208 line: self.token_end.line,
209 column: self.token_end.column,
210 length: 0,
211 kind: Unspecified,
212 path: self.path,
213 };
214 CompileError::new(
215 token,
216 Internal {
217 message: message.into(),
218 },
219 )
220 }
221
222 fn error(&self, kind: CompileErrorKind<'src>) -> CompileError<'src> {
224 let length = match kind {
228 UnterminatedString | UnterminatedBacktick => {
229 let Some(kind) = StringKind::from_token_start(self.lexeme()) else {
230 return self.internal_error("Lexer::error: expected string or backtick token start");
231 };
232 kind.delimiter().len()
233 }
234 _ => self.lexeme().len(),
236 };
237
238 let token = Token {
239 kind: Unspecified,
240 src: self.src,
241 offset: self.token_start.offset,
242 line: self.token_start.line,
243 column: self.token_start.column,
244 length,
245 path: self.path,
246 };
247
248 CompileError::new(token, kind)
249 }
250
251 fn unterminated_interpolation_error(interpolation_start: Token<'src>) -> CompileError<'src> {
252 CompileError::new(interpolation_start, UnterminatedInterpolation)
253 }
254
255 pub(crate) fn is_identifier_start(c: char) -> bool {
257 matches!(c, 'a'..='z' | 'A'..='Z' | '_')
258 }
259
260 pub(crate) fn is_identifier_continue(c: char) -> bool {
262 Self::is_identifier_start(c) || matches!(c, '0'..='9' | '-')
263 }
264
265 fn tokenize(mut self) -> CompileResult<'src, Vec<Token<'src>>> {
267 loop {
268 if self.token_start.column == 0 {
269 self.lex_line_start()?;
270 }
271
272 match self.next {
273 Some(first) => {
274 if let Some(&interpolation_start) = self.interpolation_stack.last() {
275 self.lex_interpolation(interpolation_start, first)?;
276 } else if self.recipe_body {
277 self.lex_body()?;
278 } else {
279 self.lex_normal(first)?;
280 }
281 }
282 None => break,
283 }
284 }
285
286 if let Some(&interpolation_start) = self.interpolation_stack.last() {
287 return Err(Self::unterminated_interpolation_error(interpolation_start));
288 }
289
290 while self.indented() {
291 self.lex_dedent();
292 }
293
294 self.token(Eof);
295
296 assert_eq!(self.token_start.offset, self.token_end.offset);
297 assert_eq!(self.token_start.offset, self.src.len());
298 assert_eq!(self.indentation.len(), 1);
299
300 Ok(self.tokens)
301 }
302
303 fn lex_line_start(&mut self) -> CompileResult<'src> {
305 enum Indentation<'src> {
306 Blank,
308 Continue,
310 Decrease,
312 Inconsistent,
314 Increase,
316 Mixed { whitespace: &'src str },
318 }
319
320 use Indentation::*;
321
322 let nonblank_index = self
323 .rest()
324 .char_indices()
325 .skip_while(|&(_, c)| c == ' ' || c == '\t')
326 .map(|(i, _)| i)
327 .next()
328 .unwrap_or_else(|| self.rest().len());
329
330 let rest = &self.rest()[nonblank_index..];
331
332 let whitespace = &self.rest()[..nonblank_index];
333
334 if self.open_delimiters_or_interpolation() {
335 if !whitespace.is_empty() {
336 while self.next_is_whitespace() {
337 self.advance()?;
338 }
339
340 self.token(Whitespace);
341 }
342
343 return Ok(());
344 }
345
346 let body_whitespace = &whitespace[..whitespace
347 .char_indices()
348 .take(self.indentation().chars().count())
349 .map(|(i, _c)| i)
350 .next()
351 .unwrap_or(0)];
352
353 let spaces = whitespace.chars().any(|c| c == ' ');
354 let tabs = whitespace.chars().any(|c| c == '\t');
355
356 let body_spaces = body_whitespace.chars().any(|c| c == ' ');
357 let body_tabs = body_whitespace.chars().any(|c| c == '\t');
358
359 #[allow(clippy::if_same_then_else)]
360 let indentation = if rest.starts_with('\n') || rest.starts_with("\r\n") || rest.is_empty() {
361 Blank
362 } else if whitespace == self.indentation() {
363 Continue
364 } else if self.indentation.contains(&whitespace) {
365 Decrease
366 } else if self.recipe_body && whitespace.starts_with(self.indentation()) {
367 Continue
368 } else if self.recipe_body && body_spaces && body_tabs {
369 Mixed {
370 whitespace: body_whitespace,
371 }
372 } else if !self.recipe_body && spaces && tabs {
373 Mixed { whitespace }
374 } else if whitespace.len() < self.indentation().len() {
375 Inconsistent
376 } else if self.recipe_body
377 && body_whitespace.len() >= self.indentation().len()
378 && !body_whitespace.starts_with(self.indentation())
379 {
380 Inconsistent
381 } else if whitespace.len() >= self.indentation().len()
382 && !whitespace.starts_with(self.indentation())
383 {
384 Inconsistent
385 } else {
386 Increase
387 };
388
389 match indentation {
390 Blank => {
391 if !whitespace.is_empty() {
392 while self.next_is_whitespace() {
393 self.advance()?;
394 }
395
396 self.token(Whitespace);
397 }
398
399 Ok(())
400 }
401 Continue => {
402 if !self.indentation().is_empty() {
403 for _ in self.indentation().chars() {
404 self.advance()?;
405 }
406
407 self.token(Whitespace);
408 }
409
410 Ok(())
411 }
412 Decrease => {
413 while self.indentation() != whitespace {
414 self.lex_dedent();
415 }
416
417 if !whitespace.is_empty() {
418 while self.next_is_whitespace() {
419 self.advance()?;
420 }
421
422 self.token(Whitespace);
423 }
424
425 Ok(())
426 }
427 Mixed { whitespace } => {
428 for _ in whitespace.chars() {
429 self.advance()?;
430 }
431
432 Err(self.error(MixedLeadingWhitespace { whitespace }))
433 }
434 Inconsistent => {
435 for _ in whitespace.chars() {
436 self.advance()?;
437 }
438
439 Err(self.error(InconsistentLeadingWhitespace {
440 expected: self.indentation(),
441 found: whitespace,
442 }))
443 }
444 Increase => {
445 while self.next_is_whitespace() {
446 self.advance()?;
447 }
448
449 let indentation = self.lexeme();
450 self.indentation.push(indentation);
451 self.token(Indent);
452 if self.recipe_body_pending {
453 self.recipe_body = true;
454 }
455
456 Ok(())
457 }
458 }
459 }
460
461 fn lex_normal(&mut self, start: char) -> CompileResult<'src> {
463 match start {
464 ' ' | '\t' => self.lex_whitespace(),
465 '!' if self.rest().starts_with("!include") => Err(self.error(Include)),
466 '!' => self.lex_choices('!', &[('=', BangEquals), ('~', BangTilde)], None),
467 '#' => self.lex_comment(),
468 '$' => self.lex_single(Dollar),
469 '&' => self.lex_digraph('&', '&', AmpersandAmpersand),
470 '(' => self.lex_delimiter(ParenL),
471 ')' => self.lex_delimiter(ParenR),
472 '*' => self.lex_single(Asterisk),
473 '+' => self.lex_single(Plus),
474 ',' => self.lex_single(Comma),
475 '/' => self.lex_single(Slash),
476 ':' => self.lex_colon(),
477 '=' => self.lex_choices(
478 '=',
479 &[('=', EqualsEquals), ('~', EqualsTilde)],
480 Some(Equals),
481 ),
482 '?' => self.lex_single(QuestionMark),
483 '@' => self.lex_single(At),
484 '[' => self.lex_delimiter(BracketL),
485 '\\' => self.lex_escape(),
486 '\n' | '\r' => self.lex_eol(),
487 '\u{feff}' => self.lex_single(ByteOrderMark),
488 ']' => self.lex_delimiter(BracketR),
489 '`' | '"' | '\'' => self.lex_string(None),
490 '{' => self.lex_delimiter(BraceL),
491 '|' => self.lex_digraph('|', '|', BarBar),
492 '}' => {
493 let format_string_kind = self.open_delimiters.last().and_then(|(delimiter, _line)| {
494 if !self.rest().starts_with(Self::INTERPOLATION_END) {
495 None
496 } else if let Delimiter::FormatString(kind) = delimiter {
497 Some(kind)
498 } else {
499 None
500 }
501 });
502
503 if let Some(format_string_kind) = format_string_kind {
504 self.lex_string(Some(*format_string_kind))
505 } else {
506 self.lex_delimiter(BraceR)
507 }
508 }
509 _ if Self::is_identifier_start(start) => self.lex_identifier(),
510 _ => {
511 self.advance()?;
512 Err(self.error(UnknownStartOfToken { start }))
513 }
514 }
515 }
516
517 fn lex_interpolation(
519 &mut self,
520 interpolation_start: Token<'src>,
521 start: char,
522 ) -> CompileResult<'src> {
523 if self.rest_starts_with(Self::INTERPOLATION_END) && self.open_delimiters.is_empty() {
524 if self.interpolation_stack.pop().is_none() {
526 self.presume_str(Self::INTERPOLATION_END)?;
527 return Err(self.internal_error(
528 "Lexer::lex_interpolation found `}}` but was called with empty interpolation stack.",
529 ));
530 }
531 self.lex_double(InterpolationEnd)
533 } else if self.at_eof() && self.open_delimiters.is_empty() {
534 Err(Self::unterminated_interpolation_error(interpolation_start))
537 } else {
538 self.lex_normal(start)
540 }
541 }
542
543 fn lex_body(&mut self) -> CompileResult<'src> {
545 enum Terminator {
546 EndOfFile,
547 Interpolation,
548 Newline,
549 NewlineCarriageReturn,
550 }
551
552 use Terminator::*;
553
554 let terminator = loop {
555 if self.rest_starts_with(Self::INTERPOLATION_ESCAPE) {
556 self.presume_str(Self::INTERPOLATION_ESCAPE)?;
557 continue;
558 }
559
560 if self.rest_starts_with("\n") {
561 break Newline;
562 }
563
564 if self.rest_starts_with("\r\n") {
565 break NewlineCarriageReturn;
566 }
567
568 if self.rest_starts_with(Self::INTERPOLATION_START) {
569 break Interpolation;
570 }
571
572 if self.at_eof() {
573 break EndOfFile;
574 }
575
576 self.advance()?;
577 };
578
579 if self.current_token_length() > 0 {
581 self.token(Text);
582 }
583
584 match terminator {
585 Newline => self.lex_single(Eol),
586 NewlineCarriageReturn => self.lex_double(Eol),
587 Interpolation => {
588 self.lex_double(InterpolationStart)?;
589 self
590 .interpolation_stack
591 .push(self.tokens[self.tokens.len() - 1]);
592 Ok(())
593 }
594 EndOfFile => Ok(()),
595 }
596 }
597
598 fn lex_dedent(&mut self) {
599 assert_eq!(self.current_token_length(), 0);
600 self.token(Dedent);
601 self.indentation.pop();
602 self.recipe_body_pending = false;
603 self.recipe_body = false;
604 }
605
606 fn lex_single(&mut self, kind: TokenKind) -> CompileResult<'src> {
608 self.advance()?;
609 self.token(kind);
610 Ok(())
611 }
612
613 fn lex_double(&mut self, kind: TokenKind) -> CompileResult<'src> {
615 self.advance()?;
616 self.advance()?;
617 self.token(kind);
618 Ok(())
619 }
620
621 fn lex_choices(
625 &mut self,
626 first: char,
627 choices: &[(char, TokenKind)],
628 otherwise: Option<TokenKind>,
629 ) -> CompileResult<'src> {
630 self.presume(first)?;
631
632 for (second, then) in choices {
633 if self.accepted(*second)? {
634 self.token(*then);
635 return Ok(());
636 }
637 }
638
639 if let Some(token) = otherwise {
640 self.token(token);
641 } else {
642 self.token(Unspecified);
644
645 let expected = choices.iter().map(|choice| choice.0).collect();
646
647 if self.at_eof() {
648 return Err(self.error(UnexpectedEndOfToken { expected }));
649 }
650
651 self.advance()?;
653
654 return Err(self.error(UnexpectedCharacter { expected }));
656 }
657
658 Ok(())
659 }
660
661 fn lex_delimiter(&mut self, kind: TokenKind) -> CompileResult<'src> {
663 match kind {
664 BraceL => self.open_delimiter(Delimiter::Brace),
665 BraceR => self.close_delimiter(Delimiter::Brace)?,
666 BracketL => self.open_delimiter(Delimiter::Bracket),
667 BracketR => self.close_delimiter(Delimiter::Bracket)?,
668 ParenL => self.open_delimiter(Delimiter::Paren),
669 ParenR => self.close_delimiter(Delimiter::Paren)?,
670 _ => {
671 return Err(self.internal_error(format!(
672 "Lexer::lex_delimiter called with non-delimiter token: `{kind}`",
673 )));
674 }
675 }
676
677 self.lex_single(kind)?;
679
680 Ok(())
681 }
682
683 fn open_delimiter(&mut self, delimiter: Delimiter) {
685 self
686 .open_delimiters
687 .push((delimiter, self.token_start.line));
688 }
689
690 fn close_delimiter(&mut self, close: Delimiter) -> CompileResult<'src> {
692 match self.open_delimiters.pop() {
693 Some((open, _)) if open == close => Ok(()),
694 Some((open, open_line)) => Err(self.error(MismatchedClosingDelimiter {
695 open,
696 close,
697 open_line,
698 })),
699 None => Err(self.error(UnexpectedClosingDelimiter { close })),
700 }
701 }
702
703 fn open_delimiters_or_interpolation(&self) -> bool {
705 !self.open_delimiters.is_empty() || !self.interpolation_stack.is_empty()
706 }
707
708 fn lex_digraph(&mut self, left: char, right: char, token: TokenKind) -> CompileResult<'src> {
710 self.presume(left)?;
711
712 if self.accepted(right)? {
713 self.token(token);
714 Ok(())
715 } else {
716 self.token(Unspecified);
718
719 if self.at_eof() {
720 return Err(self.error(UnexpectedEndOfToken {
721 expected: vec![right],
722 }));
723 }
724
725 self.advance()?;
727
728 Err(self.error(UnexpectedCharacter {
730 expected: vec![right],
731 }))
732 }
733 }
734
735 fn lex_colon(&mut self) -> CompileResult<'src> {
737 self.presume(':')?;
738
739 if self.accepted('=')? {
740 self.token(ColonEquals);
741 } else if self.accepted(':')? {
742 self.token(ColonColon);
743 } else {
744 self.token(Colon);
745 self.recipe_body_pending = true;
746 }
747
748 Ok(())
749 }
750
751 fn lex_escape(&mut self) -> CompileResult<'src> {
753 self.presume('\\')?;
754
755 if self.accepted('\n')? {
757 while self.next_is_whitespace() {
758 self.advance()?;
759 }
760 self.token(Whitespace);
761 } else if self.accepted('\r')? {
762 if !self.accepted('\n')? {
763 return Err(self.error(UnpairedCarriageReturn));
764 }
765 while self.next_is_whitespace() {
766 self.advance()?;
767 }
768 self.token(Whitespace);
769 } else if let Some(character) = self.next {
770 return Err(self.error(InvalidEscapeSequence { character }));
771 }
772
773 Ok(())
774 }
775
776 fn lex_eol(&mut self) -> CompileResult<'src> {
778 if self.accepted('\r')? {
779 if !self.accepted('\n')? {
780 return Err(self.error(UnpairedCarriageReturn));
781 }
782 } else {
783 self.presume('\n')?;
784 }
785
786 if self.open_delimiters_or_interpolation() {
788 self.token(Whitespace);
789 } else {
790 self.token(Eol);
791 }
792
793 Ok(())
794 }
795
796 fn lex_identifier(&mut self) -> CompileResult<'src> {
798 self.advance()?;
799
800 while let Some(c) = self.next {
801 if !Self::is_identifier_continue(c) {
802 break;
803 }
804
805 self.advance()?;
806 }
807
808 self.token(Identifier);
809
810 Ok(())
811 }
812
813 fn lex_comment(&mut self) -> CompileResult<'src> {
815 self.presume('#')?;
816
817 while !self.at_eol_or_eof() {
818 self.advance()?;
819 }
820
821 self.token(Comment);
822
823 Ok(())
824 }
825
826 fn lex_whitespace(&mut self) -> CompileResult<'src> {
828 while self.next_is_whitespace() {
829 self.advance()?;
830 }
831
832 self.token(Whitespace);
833
834 Ok(())
835 }
836
837 fn lex_string(&mut self, format_string_kind: Option<StringKind>) -> CompileResult<'src> {
843 let format = format_string_kind.is_some()
844 || self.tokens.last().is_some_and(|token| {
845 token.kind == TokenKind::Identifier && token.lexeme() == Keyword::F.lexeme()
846 });
847
848 let kind = if let Some(kind) = format_string_kind {
849 self.presume_str(Self::INTERPOLATION_END)?;
850 kind
851 } else {
852 let Some(kind) = StringKind::from_token_start(self.rest()) else {
853 self.advance()?;
854 return Err(self.internal_error("Lexer::lex_string: invalid string start"));
855 };
856 self.presume_str(kind.delimiter())?;
857 kind
858 };
859
860 let mut escape = false;
861
862 loop {
863 if self.next.is_none() {
864 return Err(self.error(kind.unterminated_error_kind()));
865 } else if !escape && kind.processes_escape_sequences() && self.next_is('\\') {
866 escape = true;
867 } else if escape && kind.processes_escape_sequences() && self.next_is('u') {
868 escape = false;
869 } else if format && self.rest_starts_with(Self::INTERPOLATION_ESCAPE) {
870 escape = false;
871 self.advance()?;
872 self.advance()?;
873 self.advance()?;
874 } else if !escape
875 && (self.rest_starts_with(kind.delimiter())
876 || format && self.rest_starts_with(Self::INTERPOLATION_START))
877 {
878 break;
879 } else {
880 escape = false;
881 }
882
883 self.advance()?;
884 }
885
886 if format && self.rest_starts_with(Self::INTERPOLATION_START) {
887 self.presume_str(Self::INTERPOLATION_START)?;
888 if format_string_kind.is_some() {
889 self.token(FormatStringContinue);
890 } else {
891 self.token(FormatStringStart);
892 self.open_delimiter(Delimiter::FormatString(kind));
893 }
894 } else {
895 self.presume_str(kind.delimiter())?;
896
897 if let Some(format_string_kind) = format_string_kind {
898 self.close_delimiter(Delimiter::FormatString(format_string_kind))?;
899 self.token(FormatStringEnd);
900 } else {
901 self.token(kind.token_kind());
902 }
903 }
904
905 Ok(())
906 }
907}
908
909#[cfg(test)]
910mod tests {
911 use super::*;
912
913 use pretty_assertions::assert_eq;
914
915 macro_rules! test {
916 {
917 name: $name:ident,
918 text: $text:expr,
919 tokens: ($($kind:ident $(: $lexeme:literal)?),* $(,)?)$(,)?
920 } => {
921 #[test]
922 fn $name() {
923 let kinds: &[TokenKind] = &[$($kind,)* Eof];
924
925 let lexemes: &[&str] = &[$(lexeme!($kind $(, $lexeme)?),)* ""];
926
927 test($text, true, kinds, lexemes);
928 }
929 };
930 {
931 name: $name:ident,
932 text: $text:expr,
933 tokens: ($($kind:ident $(: $lexeme:literal)?),* $(,)?)$(,)?
934 unindent: $unindent:expr,
935 } => {
936 #[test]
937 fn $name() {
938 let kinds: &[TokenKind] = &[$($kind,)* Eof];
939
940 let lexemes: &[&str] = &[$(lexeme!($kind $(, $lexeme)?),)* ""];
941
942 test($text, $unindent, kinds, lexemes);
943 }
944 }
945 }
946
947 macro_rules! lexeme {
948 {
949 $kind:ident, $lexeme:literal
950 } => {
951 $lexeme
952 };
953 {
954 $kind:ident
955 } => {
956 default_lexeme($kind)
957 }
958 }
959
960 #[track_caller]
961 fn test(text: &str, unindent_text: bool, want_kinds: &[TokenKind], want_lexemes: &[&str]) {
962 let text = if unindent_text {
963 unindent(text)
964 } else {
965 text.to_owned()
966 };
967
968 let have = Lexer::test_lex(&text).unwrap();
969
970 let have_kinds = have
971 .iter()
972 .map(|token| token.kind)
973 .collect::<Vec<TokenKind>>();
974
975 let have_lexemes = have.iter().map(Token::lexeme).collect::<Vec<&str>>();
976
977 assert_eq!(have_kinds, want_kinds, "Token kind mismatch");
978 assert_eq!(have_lexemes, want_lexemes, "Token lexeme mismatch");
979
980 let mut roundtrip = String::new();
981
982 for lexeme in have_lexemes {
983 roundtrip.push_str(lexeme);
984 }
985
986 assert_eq!(roundtrip, text, "Roundtrip mismatch");
987
988 let mut offset = 0;
989 let mut line = 0;
990 let mut column = 0;
991
992 for token in have {
993 assert_eq!(token.offset, offset);
994 assert_eq!(token.line, line);
995 assert_eq!(token.lexeme().len(), token.length);
996 assert_eq!(token.column, column);
997
998 for c in token.lexeme().chars() {
999 if c == '\n' {
1000 line += 1;
1001 column = 0;
1002 } else {
1003 column += c.len_utf8();
1004 }
1005 }
1006
1007 offset += token.length;
1008 }
1009 }
1010
1011 fn default_lexeme(kind: TokenKind) -> &'static str {
1012 match kind {
1013 AmpersandAmpersand => "&&",
1015 Asterisk => "*",
1016 At => "@",
1017 BangEquals => "!=",
1018 BangTilde => "!~",
1019 BarBar => "||",
1020 BraceL => "{",
1021 BraceR => "}",
1022 BracketL => "[",
1023 BracketR => "]",
1024 ByteOrderMark => "\u{feff}",
1025 Colon => ":",
1026 ColonColon => "::",
1027 ColonEquals => ":=",
1028 Comma => ",",
1029 Dollar => "$",
1030 Eol => "\n",
1031 Equals => "=",
1032 EqualsEquals => "==",
1033 EqualsTilde => "=~",
1034 Indent => " ",
1035 InterpolationEnd => "}}",
1036 InterpolationStart => "{{",
1037 ParenL => "(",
1038 ParenR => ")",
1039 Plus => "+",
1040 QuestionMark => "?",
1041 Slash => "/",
1042 Whitespace => " ",
1043
1044 Dedent | Eof => "",
1046
1047 Backtick | Comment | FormatStringContinue | FormatStringEnd | FormatStringStart
1049 | Identifier | StringToken | Text | Unspecified => {
1050 panic!("Token {kind:?} has no default lexeme")
1051 }
1052 }
1053 }
1054
1055 macro_rules! error {
1056 (
1057 name: $name:ident,
1058 input: $input:expr,
1059 offset: $offset:expr,
1060 line: $line:expr,
1061 column: $column:expr,
1062 width: $width:expr,
1063 kind: $kind:expr,
1064 ) => {
1065 #[test]
1066 fn $name() {
1067 error($input, $offset, $line, $column, $width, $kind);
1068 }
1069 };
1070 }
1071
1072 #[track_caller]
1073 fn error(
1074 src: &str,
1075 offset: usize,
1076 line: usize,
1077 column: usize,
1078 length: usize,
1079 kind: CompileErrorKind,
1080 ) {
1081 match Lexer::test_lex(src) {
1082 Ok(_) => panic!("Lexing succeeded but expected"),
1083 Err(have) => {
1084 let want = CompileError {
1085 token: Token {
1086 kind: have.token.kind,
1087 src,
1088 offset,
1089 line,
1090 column,
1091 length,
1092 path: "justfile".as_ref(),
1093 },
1094 kind: kind.into(),
1095 };
1096 assert_eq!(have, want);
1097 }
1098 }
1099 }
1100
1101 test! {
1102 name: name_new,
1103 text: "foo",
1104 tokens: (Identifier:"foo"),
1105 }
1106
1107 test! {
1108 name: comment,
1109 text: "# hello",
1110 tokens: (Comment:"# hello"),
1111 }
1112
1113 test! {
1114 name: backtick,
1115 text: "`echo`",
1116 tokens: (Backtick:"`echo`"),
1117 }
1118
1119 test! {
1120 name: backtick_multi_line,
1121 text: "`echo\necho`",
1122 tokens: (Backtick:"`echo\necho`"),
1123 }
1124
1125 test! {
1126 name: raw_string,
1127 text: "'hello'",
1128 tokens: (StringToken:"'hello'"),
1129 }
1130
1131 test! {
1132 name: raw_string_multi_line,
1133 text: "'hello\ngoodbye'",
1134 tokens: (StringToken:"'hello\ngoodbye'"),
1135 }
1136
1137 test! {
1138 name: cooked_string,
1139 text: "\"hello\"",
1140 tokens: (StringToken:"\"hello\""),
1141 }
1142
1143 test! {
1144 name: cooked_string_multi_line,
1145 text: "\"hello\ngoodbye\"",
1146 tokens: (StringToken:"\"hello\ngoodbye\""),
1147 }
1148
1149 test! {
1150 name: cooked_multiline_string,
1151 text: "\"\"\"hello\ngoodbye\"\"\"",
1152 tokens: (StringToken:"\"\"\"hello\ngoodbye\"\"\""),
1153 }
1154
1155 test! {
1156 name: ampersand_ampersand,
1157 text: "&&",
1158 tokens: (AmpersandAmpersand),
1159 }
1160
1161 test! {
1162 name: equals,
1163 text: "=",
1164 tokens: (Equals),
1165 }
1166
1167 test! {
1168 name: equals_equals,
1169 text: "==",
1170 tokens: (EqualsEquals),
1171 }
1172
1173 test! {
1174 name: bang_equals,
1175 text: "!=",
1176 tokens: (BangEquals),
1177 }
1178
1179 test! {
1180 name: brace_l,
1181 text: "{",
1182 tokens: (BraceL),
1183 }
1184
1185 test! {
1186 name: brace_r,
1187 text: "{}",
1188 tokens: (BraceL, BraceR),
1189 }
1190
1191 test! {
1192 name: brace_lll,
1193 text: "{{{",
1194 tokens: (BraceL, BraceL, BraceL),
1195 }
1196
1197 test! {
1198 name: brace_rrr,
1199 text: "{{{}}}",
1200 tokens: (BraceL, BraceL, BraceL, BraceR, BraceR, BraceR),
1201 }
1202
1203 test! {
1204 name: dollar,
1205 text: "$",
1206 tokens: (Dollar),
1207 }
1208
1209 test! {
1210 name: export_concatenation,
1211 text: "export foo = 'foo' + 'bar'",
1212 tokens: (
1213 Identifier:"export",
1214 Whitespace,
1215 Identifier:"foo",
1216 Whitespace,
1217 Equals,
1218 Whitespace,
1219 StringToken:"'foo'",
1220 Whitespace,
1221 Plus,
1222 Whitespace,
1223 StringToken:"'bar'",
1224 )
1225 }
1226
1227 test! {
1228 name: export_complex,
1229 text: "export foo = ('foo' + 'bar') + `baz`",
1230 tokens: (
1231 Identifier:"export",
1232 Whitespace,
1233 Identifier:"foo",
1234 Whitespace,
1235 Equals,
1236 Whitespace,
1237 ParenL,
1238 StringToken:"'foo'",
1239 Whitespace,
1240 Plus,
1241 Whitespace,
1242 StringToken:"'bar'",
1243 ParenR,
1244 Whitespace,
1245 Plus,
1246 Whitespace,
1247 Backtick:"`baz`",
1248 ),
1249 }
1250
1251 test! {
1252 name: eol_linefeed,
1253 text: "\n",
1254 tokens: (Eol),
1255 unindent: false,
1256 }
1257
1258 test! {
1259 name: eol_carriage_return_linefeed,
1260 text: "\r\n",
1261 tokens: (Eol:"\r\n"),
1262 unindent: false,
1263 }
1264
1265 test! {
1266 name: indented_line,
1267 text: "foo:\n a",
1268 tokens: (Identifier:"foo", Colon, Eol, Indent:" ", Text:"a", Dedent),
1269 }
1270
1271 test! {
1272 name: indented_normal,
1273 text: "
1274 a
1275 b
1276 c
1277 ",
1278 tokens: (
1279 Identifier:"a",
1280 Eol,
1281 Indent:" ",
1282 Identifier:"b",
1283 Eol,
1284 Whitespace:" ",
1285 Identifier:"c",
1286 Eol,
1287 Dedent,
1288 ),
1289 }
1290
1291 test! {
1292 name: indented_normal_nonempty_blank,
1293 text: "a\n b\n\t\t\n c\n",
1294 tokens: (
1295 Identifier:"a",
1296 Eol,
1297 Indent:" ",
1298 Identifier:"b",
1299 Eol,
1300 Whitespace:"\t\t",
1301 Eol,
1302 Whitespace:" ",
1303 Identifier:"c",
1304 Eol,
1305 Dedent,
1306 ),
1307 unindent: false,
1308 }
1309
1310 test! {
1311 name: indented_normal_multiple,
1312 text: "
1313 a
1314 b
1315 c
1316 ",
1317 tokens: (
1318 Identifier:"a",
1319 Eol,
1320 Indent:" ",
1321 Identifier:"b",
1322 Eol,
1323 Indent:" ",
1324 Identifier:"c",
1325 Eol,
1326 Dedent,
1327 Dedent,
1328 ),
1329 }
1330
1331 test! {
1332 name: indent_indent_dedent_indent,
1333 text: "
1334 a
1335 b
1336 c
1337 d
1338 e
1339 ",
1340 tokens: (
1341 Identifier:"a",
1342 Eol,
1343 Indent:" ",
1344 Identifier:"b",
1345 Eol,
1346 Indent:" ",
1347 Identifier:"c",
1348 Eol,
1349 Dedent,
1350 Whitespace:" ",
1351 Identifier:"d",
1352 Eol,
1353 Indent:" ",
1354 Identifier:"e",
1355 Eol,
1356 Dedent,
1357 Dedent,
1358 ),
1359 }
1360
1361 test! {
1362 name: indent_recipe_dedent_indent,
1363 text: "
1364 a
1365 b:
1366 c
1367 d
1368 e
1369 ",
1370 tokens: (
1371 Identifier:"a",
1372 Eol,
1373 Indent:" ",
1374 Identifier:"b",
1375 Colon,
1376 Eol,
1377 Indent:" ",
1378 Text:"c",
1379 Eol,
1380 Dedent,
1381 Whitespace:" ",
1382 Identifier:"d",
1383 Eol,
1384 Indent:" ",
1385 Identifier:"e",
1386 Eol,
1387 Dedent,
1388 Dedent,
1389 ),
1390 }
1391
1392 test! {
1393 name: indented_block,
1394 text: "
1395 foo:
1396 a
1397 b
1398 c
1399 ",
1400 tokens: (
1401 Identifier:"foo",
1402 Colon,
1403 Eol,
1404 Indent,
1405 Text:"a",
1406 Eol,
1407 Whitespace:" ",
1408 Text:"b",
1409 Eol,
1410 Whitespace:" ",
1411 Text:"c",
1412 Eol,
1413 Dedent,
1414 )
1415 }
1416
1417 test! {
1418 name: brace_escape,
1419 text: "
1420 foo:
1421 {{{{
1422 ",
1423 tokens: (
1424 Identifier:"foo",
1425 Colon,
1426 Eol,
1427 Indent,
1428 Text:"{{{{",
1429 Eol,
1430 Dedent,
1431 )
1432 }
1433
1434 test! {
1435 name: indented_block_followed_by_item,
1436 text: "
1437 foo:
1438 a
1439 b:
1440 ",
1441 tokens: (
1442 Identifier:"foo",
1443 Colon,
1444 Eol,
1445 Indent,
1446 Text:"a",
1447 Eol,
1448 Dedent,
1449 Identifier:"b",
1450 Colon,
1451 Eol,
1452 )
1453 }
1454
1455 test! {
1456 name: indented_block_followed_by_blank,
1457 text: "
1458 foo:
1459 a
1460
1461 b:
1462 ",
1463 tokens: (
1464 Identifier:"foo",
1465 Colon,
1466 Eol,
1467 Indent:" ",
1468 Text:"a",
1469 Eol,
1470 Eol,
1471 Dedent,
1472 Identifier:"b",
1473 Colon,
1474 Eol,
1475 ),
1476 }
1477
1478 test! {
1479 name: indented_line_containing_unpaired_carriage_return,
1480 text: "foo:\n \r \n",
1481 tokens: (
1482 Identifier:"foo",
1483 Colon,
1484 Eol,
1485 Indent:" ",
1486 Text:"\r ",
1487 Eol,
1488 Dedent,
1489 ),
1490 unindent: false,
1491 }
1492
1493 test! {
1494 name: indented_blocks,
1495 text: "
1496 b: a
1497 @mv a b
1498
1499 a:
1500 @touch F
1501 @touch a
1502
1503 d: c
1504 @rm c
1505
1506 c: b
1507 @mv b c
1508 ",
1509 tokens: (
1510 Identifier:"b",
1511 Colon,
1512 Whitespace,
1513 Identifier:"a",
1514 Eol,
1515 Indent,
1516 Text:"@mv a b",
1517 Eol,
1518 Eol,
1519 Dedent,
1520 Identifier:"a",
1521 Colon,
1522 Eol,
1523 Indent,
1524 Text:"@touch F",
1525 Eol,
1526 Whitespace:" ",
1527 Text:"@touch a",
1528 Eol,
1529 Eol,
1530 Dedent,
1531 Identifier:"d",
1532 Colon,
1533 Whitespace,
1534 Identifier:"c",
1535 Eol,
1536 Indent,
1537 Text:"@rm c",
1538 Eol,
1539 Eol,
1540 Dedent,
1541 Identifier:"c",
1542 Colon,
1543 Whitespace,
1544 Identifier:"b",
1545 Eol,
1546 Indent,
1547 Text:"@mv b c",
1548 Eol,
1549 Dedent
1550 ),
1551 }
1552
1553 test! {
1554 name: interpolation_empty,
1555 text: "hello:\n echo {{}}",
1556 tokens: (
1557 Identifier:"hello",
1558 Colon,
1559 Eol,
1560 Indent:" ",
1561 Text:"echo ",
1562 InterpolationStart,
1563 InterpolationEnd,
1564 Dedent,
1565 ),
1566 }
1567
1568 test! {
1569 name: interpolation_expression,
1570 text: "hello:\n echo {{`echo hello` + `echo goodbye`}}",
1571 tokens: (
1572 Identifier:"hello",
1573 Colon,
1574 Eol,
1575 Indent:" ",
1576 Text:"echo ",
1577 InterpolationStart,
1578 Backtick:"`echo hello`",
1579 Whitespace,
1580 Plus,
1581 Whitespace,
1582 Backtick:"`echo goodbye`",
1583 InterpolationEnd,
1584 Dedent,
1585 ),
1586 }
1587
1588 test! {
1589 name: interpolation_raw_multiline_string,
1590 text: "hello:\n echo {{'\n'}}",
1591 tokens: (
1592 Identifier:"hello",
1593 Colon,
1594 Eol,
1595 Indent:" ",
1596 Text:"echo ",
1597 InterpolationStart,
1598 StringToken:"'\n'",
1599 InterpolationEnd,
1600 Dedent,
1601 ),
1602 }
1603
1604 test! {
1605 name: tokenize_names,
1606 text: "
1607 foo
1608 bar-bob
1609 b-bob_asdfAAAA
1610 test123
1611 ",
1612 tokens: (
1613 Identifier:"foo",
1614 Eol,
1615 Identifier:"bar-bob",
1616 Eol,
1617 Identifier:"b-bob_asdfAAAA",
1618 Eol,
1619 Identifier:"test123",
1620 Eol,
1621 ),
1622 }
1623
1624 test! {
1625 name: tokenize_indented_line,
1626 text: "foo:\n a",
1627 tokens: (
1628 Identifier:"foo",
1629 Colon,
1630 Eol,
1631 Indent:" ",
1632 Text:"a",
1633 Dedent,
1634 ),
1635 }
1636
1637 test! {
1638 name: tokenize_indented_block,
1639 text: "
1640 foo:
1641 a
1642 b
1643 c
1644 ",
1645 tokens: (
1646 Identifier:"foo",
1647 Colon,
1648 Eol,
1649 Indent,
1650 Text:"a",
1651 Eol,
1652 Whitespace:" ",
1653 Text:"b",
1654 Eol,
1655 Whitespace:" ",
1656 Text:"c",
1657 Eol,
1658 Dedent,
1659 ),
1660 }
1661
1662 test! {
1663 name: tokenize_strings,
1664 text: r#"a = "'a'" + '"b"' + "'c'" + '"d"'#echo hello"#,
1665 tokens: (
1666 Identifier:"a",
1667 Whitespace,
1668 Equals,
1669 Whitespace,
1670 StringToken:"\"'a'\"",
1671 Whitespace,
1672 Plus,
1673 Whitespace,
1674 StringToken:"'\"b\"'",
1675 Whitespace,
1676 Plus,
1677 Whitespace,
1678 StringToken:"\"'c'\"",
1679 Whitespace,
1680 Plus,
1681 Whitespace,
1682 StringToken:"'\"d\"'",
1683 Comment:"#echo hello",
1684 )
1685 }
1686
1687 test! {
1688 name: tokenize_recipe_interpolation_eol,
1689 text: "
1690 foo: # some comment
1691 {{hello}}
1692 ",
1693 tokens: (
1694 Identifier:"foo",
1695 Colon,
1696 Whitespace,
1697 Comment:"# some comment",
1698 Eol,
1699 Indent:" ",
1700 InterpolationStart,
1701 Identifier:"hello",
1702 InterpolationEnd,
1703 Eol,
1704 Dedent
1705 ),
1706 }
1707
1708 test! {
1709 name: tokenize_recipe_interpolation_eof,
1710 text: "foo: # more comments
1711 {{hello}}
1712# another comment
1713",
1714 tokens: (
1715 Identifier:"foo",
1716 Colon,
1717 Whitespace,
1718 Comment:"# more comments",
1719 Eol,
1720 Indent:" ",
1721 InterpolationStart,
1722 Identifier:"hello",
1723 InterpolationEnd,
1724 Eol,
1725 Dedent,
1726 Comment:"# another comment",
1727 Eol,
1728 ),
1729 }
1730
1731 test! {
1732 name: tokenize_recipe_complex_interpolation_expression,
1733 text: "foo: #lol\n {{a + b + \"z\" + blarg}}",
1734 tokens: (
1735 Identifier:"foo",
1736 Colon,
1737 Whitespace:" ",
1738 Comment:"#lol",
1739 Eol,
1740 Indent:" ",
1741 InterpolationStart,
1742 Identifier:"a",
1743 Whitespace,
1744 Plus,
1745 Whitespace,
1746 Identifier:"b",
1747 Whitespace,
1748 Plus,
1749 Whitespace,
1750 StringToken:"\"z\"",
1751 Whitespace,
1752 Plus,
1753 Whitespace,
1754 Identifier:"blarg",
1755 InterpolationEnd,
1756 Dedent,
1757 ),
1758 }
1759
1760 test! {
1761 name: tokenize_recipe_multiple_interpolations,
1762 text: "foo:,#ok\n {{a}}0{{b}}1{{c}}",
1763 tokens: (
1764 Identifier:"foo",
1765 Colon,
1766 Comma,
1767 Comment:"#ok",
1768 Eol,
1769 Indent:" ",
1770 InterpolationStart,
1771 Identifier:"a",
1772 InterpolationEnd,
1773 Text:"0",
1774 InterpolationStart,
1775 Identifier:"b",
1776 InterpolationEnd,
1777 Text:"1",
1778 InterpolationStart,
1779 Identifier:"c",
1780 InterpolationEnd,
1781 Dedent,
1782
1783 ),
1784 }
1785
1786 test! {
1787 name: tokenize_junk,
1788 text: "
1789 bob
1790
1791 hello blah blah blah : a b c #whatever
1792 ",
1793 tokens: (
1794 Identifier:"bob",
1795 Eol,
1796 Eol,
1797 Identifier:"hello",
1798 Whitespace,
1799 Identifier:"blah",
1800 Whitespace,
1801 Identifier:"blah",
1802 Whitespace,
1803 Identifier:"blah",
1804 Whitespace,
1805 Colon,
1806 Whitespace,
1807 Identifier:"a",
1808 Whitespace,
1809 Identifier:"b",
1810 Whitespace,
1811 Identifier:"c",
1812 Whitespace,
1813 Comment:"#whatever",
1814 Eol,
1815 )
1816 }
1817
1818 test! {
1819 name: tokenize_empty_lines,
1820 text: "
1821
1822 # this does something
1823 hello:
1824 asdf
1825 bsdf
1826
1827 csdf
1828
1829 dsdf # whatever
1830
1831 # yolo
1832 ",
1833 tokens: (
1834 Eol,
1835 Comment:"# this does something",
1836 Eol,
1837 Identifier:"hello",
1838 Colon,
1839 Eol,
1840 Indent,
1841 Text:"asdf",
1842 Eol,
1843 Whitespace:" ",
1844 Text:"bsdf",
1845 Eol,
1846 Eol,
1847 Whitespace:" ",
1848 Text:"csdf",
1849 Eol,
1850 Eol,
1851 Whitespace:" ",
1852 Text:"dsdf # whatever",
1853 Eol,
1854 Eol,
1855 Dedent,
1856 Comment:"# yolo",
1857 Eol,
1858 ),
1859 }
1860
1861 test! {
1862 name: tokenize_comment_before_variable,
1863 text: "
1864 #
1865 A='1'
1866 echo:
1867 echo {{A}}
1868 ",
1869 tokens: (
1870 Comment:"#",
1871 Eol,
1872 Identifier:"A",
1873 Equals,
1874 StringToken:"'1'",
1875 Eol,
1876 Identifier:"echo",
1877 Colon,
1878 Eol,
1879 Indent,
1880 Text:"echo ",
1881 InterpolationStart,
1882 Identifier:"A",
1883 InterpolationEnd,
1884 Eol,
1885 Dedent,
1886 ),
1887 }
1888
1889 test! {
1890 name: tokenize_interpolation_backticks,
1891 text: "hello:\n echo {{`echo hello` + `echo goodbye`}}",
1892 tokens: (
1893 Identifier:"hello",
1894 Colon,
1895 Eol,
1896 Indent:" ",
1897 Text:"echo ",
1898 InterpolationStart,
1899 Backtick:"`echo hello`",
1900 Whitespace,
1901 Plus,
1902 Whitespace,
1903 Backtick:"`echo goodbye`",
1904 InterpolationEnd,
1905 Dedent
1906 ),
1907 }
1908
1909 test! {
1910 name: tokenize_empty_interpolation,
1911 text: "hello:\n echo {{}}",
1912 tokens: (
1913 Identifier:"hello",
1914 Colon,
1915 Eol,
1916 Indent:" ",
1917 Text:"echo ",
1918 InterpolationStart,
1919 InterpolationEnd,
1920 Dedent,
1921 ),
1922 }
1923
1924 test! {
1925 name: tokenize_assignment_backticks,
1926 text: "a = `echo hello` + `echo goodbye`",
1927 tokens: (
1928 Identifier:"a",
1929 Whitespace,
1930 Equals,
1931 Whitespace,
1932 Backtick:"`echo hello`",
1933 Whitespace,
1934 Plus,
1935 Whitespace,
1936 Backtick:"`echo goodbye`",
1937 ),
1938 }
1939
1940 test! {
1941 name: tokenize_multiple,
1942 text: "
1943
1944 hello:
1945 a
1946 b
1947
1948 c
1949
1950 d
1951
1952 # hello
1953 bob:
1954 frank
1955 \t
1956 ",
1957 tokens: (
1958 Eol,
1959 Identifier:"hello",
1960 Colon,
1961 Eol,
1962 Indent,
1963 Text:"a",
1964 Eol,
1965 Whitespace:" ",
1966 Text:"b",
1967 Eol,
1968 Eol,
1969 Whitespace:" ",
1970 Text:"c",
1971 Eol,
1972 Eol,
1973 Whitespace:" ",
1974 Text:"d",
1975 Eol,
1976 Eol,
1977 Dedent,
1978 Comment:"# hello",
1979 Eol,
1980 Identifier:"bob",
1981 Colon,
1982 Eol,
1983 Indent:" ",
1984 Text:"frank",
1985 Eol,
1986 Eol,
1987 Dedent,
1988 ),
1989 }
1990
1991 test! {
1992 name: tokenize_comment,
1993 text: "a:=#",
1994 tokens: (
1995 Identifier:"a",
1996 ColonEquals,
1997 Comment:"#",
1998 ),
1999 }
2000
2001 test! {
2002 name: tokenize_comment_with_bang,
2003 text: "a:=#foo!",
2004 tokens: (
2005 Identifier:"a",
2006 ColonEquals,
2007 Comment:"#foo!",
2008 ),
2009 }
2010
2011 test! {
2012 name: tokenize_order,
2013 text: "
2014 b: a
2015 @mv a b
2016
2017 a:
2018 @touch F
2019 @touch a
2020
2021 d: c
2022 @rm c
2023
2024 c: b
2025 @mv b c
2026 ",
2027 tokens: (
2028 Identifier:"b",
2029 Colon,
2030 Whitespace,
2031 Identifier:"a",
2032 Eol,
2033 Indent,
2034 Text:"@mv a b",
2035 Eol,
2036 Eol,
2037 Dedent,
2038 Identifier:"a",
2039 Colon,
2040 Eol,
2041 Indent,
2042 Text:"@touch F",
2043 Eol,
2044 Whitespace:" ",
2045 Text:"@touch a",
2046 Eol,
2047 Eol,
2048 Dedent,
2049 Identifier:"d",
2050 Colon,
2051 Whitespace,
2052 Identifier:"c",
2053 Eol,
2054 Indent,
2055 Text:"@rm c",
2056 Eol,
2057 Eol,
2058 Dedent,
2059 Identifier:"c",
2060 Colon,
2061 Whitespace,
2062 Identifier:"b",
2063 Eol,
2064 Indent,
2065 Text:"@mv b c",
2066 Eol,
2067 Dedent,
2068 ),
2069 }
2070
2071 test! {
2072 name: tokenize_parens,
2073 text: "((())) ()abc(+",
2074 tokens: (
2075 ParenL,
2076 ParenL,
2077 ParenL,
2078 ParenR,
2079 ParenR,
2080 ParenR,
2081 Whitespace,
2082 ParenL,
2083 ParenR,
2084 Identifier:"abc",
2085 ParenL,
2086 Plus,
2087 ),
2088 }
2089
2090 test! {
2091 name: crlf_newline,
2092 text: "#\r\n#asdf\r\n",
2093 tokens: (
2094 Comment:"#",
2095 Eol:"\r\n",
2096 Comment:"#asdf",
2097 Eol:"\r\n",
2098 ),
2099 }
2100
2101 test! {
2102 name: multiple_recipes,
2103 text: "a:\n foo\nb:",
2104 tokens: (
2105 Identifier:"a",
2106 Colon,
2107 Eol,
2108 Indent:" ",
2109 Text:"foo",
2110 Eol,
2111 Dedent,
2112 Identifier:"b",
2113 Colon,
2114 ),
2115 }
2116
2117 test! {
2118 name: brackets,
2119 text: "[][]",
2120 tokens: (BracketL, BracketR, BracketL, BracketR),
2121 }
2122
2123 test! {
2124 name: open_delimiter_eol,
2125 text: "[\n](\n){\n}",
2126 tokens: (
2127 BracketL, Whitespace:"\n", BracketR,
2128 ParenL, Whitespace:"\n", ParenR,
2129 BraceL, Whitespace:"\n", BraceR
2130 ),
2131 }
2132
2133 test! {
2134 name: format_string_empty,
2135 text: "f''",
2136 tokens: (
2137 Identifier: "f",
2138 StringToken: "''",
2139 ),
2140 }
2141
2142 test! {
2143 name: format_string_identifier,
2144 text: "f'{{foo}}'",
2145 tokens: (
2146 Identifier: "f",
2147 FormatStringStart: "'{{",
2148 Identifier: "foo",
2149 FormatStringEnd: "}}'",
2150 ),
2151 }
2152
2153 test! {
2154 name: format_string_continue,
2155 text: "f'{{foo}}bar{{baz}}'",
2156 tokens: (
2157 Identifier: "f",
2158 FormatStringStart: "'{{",
2159 Identifier: "foo",
2160 FormatStringContinue: "}}bar{{",
2161 Identifier: "baz",
2162 FormatStringEnd: "}}'",
2163 ),
2164 }
2165
2166 test! {
2167 name: format_string_whitespace,
2168 text: "f '{{foo}}'",
2169 tokens: (
2170 Identifier: "f",
2171 Whitespace,
2172 StringToken: "'{{foo}}'",
2173 ),
2174 }
2175
2176 test! {
2177 name: format_string_wrong_identifier,
2178 text: "g'{{foo}}'",
2179 tokens: (
2180 Identifier: "g",
2181 StringToken: "'{{foo}}'",
2182 ),
2183 }
2184
2185 test! {
2186 name: format_string_followed_by_recipe,
2187 text: "foo := f'{{'foo'}}{{'bar'}}'\nbar:",
2188 tokens: (
2189 Identifier: "foo",
2190 Whitespace: " ",
2191 ColonEquals: ":=",
2192 Whitespace: " ",
2193 Identifier: "f",
2194 FormatStringStart: "'{{",
2195 StringToken: "'foo'",
2196 FormatStringContinue: "}}{{",
2197 StringToken: "'bar'",
2198 FormatStringEnd: "}}'",
2199 Eol: "\n",
2200 Identifier: "bar",
2201 Colon,
2202 ),
2203 }
2204
2205 test! {
2206 name: indented_format_string_followed_by_recipe,
2207 text: "foo := f'''{{'foo'}}{{'bar'}}'''\nbar:",
2208 tokens: (
2209 Identifier: "foo",
2210 Whitespace: " ",
2211 ColonEquals: ":=",
2212 Whitespace: " ",
2213 Identifier: "f",
2214 FormatStringStart: "'''{{",
2215 StringToken: "'foo'",
2216 FormatStringContinue: "}}{{",
2217 StringToken: "'bar'",
2218 FormatStringEnd: "}}'''",
2219 Eol: "\n",
2220 Identifier: "bar",
2221 Colon,
2222 ),
2223 }
2224
2225 error! {
2226 name: tokenize_space_then_tab,
2227 input: "a:
2228 0
2229 1
2230\t2
2231",
2232 offset: 9,
2233 line: 3,
2234 column: 0,
2235 width: 1,
2236 kind: InconsistentLeadingWhitespace{expected: " ", found: "\t"},
2237 }
2238
2239 error! {
2240 name: tokenize_tabs_then_tab_space,
2241 input: "a:
2242\t\t0
2243\t\t 1
2244\t 2
2245",
2246 offset: 12,
2247 line: 3,
2248 column: 0,
2249 width: 3,
2250 kind: InconsistentLeadingWhitespace{expected: "\t\t", found: "\t "},
2251 }
2252
2253 error! {
2254 name: tokenize_unknown,
2255 input: "%",
2256 offset: 0,
2257 line: 0,
2258 column: 0,
2259 width: 1,
2260 kind: UnknownStartOfToken { start: '%'},
2261 }
2262
2263 error! {
2264 name: unterminated_string_with_escapes,
2265 input: r#"a = "\n\t\r\"\\"#,
2266 offset: 4,
2267 line: 0,
2268 column: 4,
2269 width: 1,
2270 kind: UnterminatedString,
2271 }
2272
2273 error! {
2274 name: unterminated_raw_string,
2275 input: "r a='asdf",
2276 offset: 4,
2277 line: 0,
2278 column: 4,
2279 width: 1,
2280 kind: UnterminatedString,
2281 }
2282
2283 error! {
2284 name: unterminated_interpolation,
2285 input: "foo:\n echo {{
2286 ",
2287 offset: 11,
2288 line: 1,
2289 column: 6,
2290 width: 2,
2291 kind: UnterminatedInterpolation,
2292 }
2293
2294 error! {
2295 name: unterminated_backtick,
2296 input: "`echo",
2297 offset: 0,
2298 line: 0,
2299 column: 0,
2300 width: 1,
2301 kind: UnterminatedBacktick,
2302 }
2303
2304 error! {
2305 name: unpaired_carriage_return,
2306 input: "foo\rbar",
2307 offset: 3,
2308 line: 0,
2309 column: 3,
2310 width: 1,
2311 kind: UnpairedCarriageReturn,
2312 }
2313
2314 error! {
2315 name: invalid_name_start_dash,
2316 input: "-foo",
2317 offset: 0,
2318 line: 0,
2319 column: 0,
2320 width: 1,
2321 kind: UnknownStartOfToken{ start: '-'},
2322 }
2323
2324 error! {
2325 name: invalid_name_start_digit,
2326 input: "0foo",
2327 offset: 0,
2328 line: 0,
2329 column: 0,
2330 width: 1,
2331 kind: UnknownStartOfToken { start: '0' },
2332 }
2333
2334 error! {
2335 name: unterminated_string,
2336 input: r#"a = ""#,
2337 offset: 4,
2338 line: 0,
2339 column: 4,
2340 width: 1,
2341 kind: UnterminatedString,
2342 }
2343
2344 error! {
2345 name: mixed_leading_whitespace_recipe,
2346 input: "a:\n\t echo hello",
2347 offset: 3,
2348 line: 1,
2349 column: 0,
2350 width: 2,
2351 kind: MixedLeadingWhitespace{whitespace: "\t "},
2352 }
2353
2354 error! {
2355 name: mixed_leading_whitespace_normal,
2356 input: "a\n\t echo hello",
2357 offset: 2,
2358 line: 1,
2359 column: 0,
2360 width: 2,
2361 kind: MixedLeadingWhitespace{whitespace: "\t "},
2362 }
2363
2364 error! {
2365 name: mixed_leading_whitespace_indent,
2366 input: "a\n foo\n \tbar",
2367 offset: 7,
2368 line: 2,
2369 column: 0,
2370 width: 2,
2371 kind: MixedLeadingWhitespace{whitespace: " \t"},
2372 }
2373
2374 error! {
2375 name: bad_dedent,
2376 input: "a\n foo\n bar\n baz",
2377 offset: 14,
2378 line: 3,
2379 column: 0,
2380 width: 2,
2381 kind: InconsistentLeadingWhitespace{expected: " ", found: " "},
2382 }
2383
2384 error! {
2385 name: unclosed_interpolation_delimiter,
2386 input: "a:\n echo {{ foo",
2387 offset: 9,
2388 line: 1,
2389 column: 6,
2390 width: 2,
2391 kind: UnterminatedInterpolation,
2392 }
2393
2394 error! {
2395 name: unexpected_character_after_at,
2396 input: "@%",
2397 offset: 1,
2398 line: 0,
2399 column: 1,
2400 width: 1,
2401 kind: UnknownStartOfToken { start: '%'},
2402 }
2403
2404 error! {
2405 name: mismatched_closing_brace,
2406 input: "(]",
2407 offset: 1,
2408 line: 0,
2409 column: 1,
2410 width: 0,
2411 kind: MismatchedClosingDelimiter {
2412 open: Delimiter::Paren,
2413 close: Delimiter::Bracket,
2414 open_line: 0,
2415 },
2416 }
2417
2418 error! {
2419 name: ampersand_eof,
2420 input: "&",
2421 offset: 1,
2422 line: 0,
2423 column: 1,
2424 width: 0,
2425 kind: UnexpectedEndOfToken {
2426 expected: vec!['&'],
2427 },
2428 }
2429
2430 error! {
2431 name: ampersand_unexpected,
2432 input: "&%",
2433 offset: 1,
2434 line: 0,
2435 column: 1,
2436 width: 1,
2437 kind: UnexpectedCharacter {
2438 expected: vec!['&'],
2439 },
2440 }
2441
2442 error! {
2443 name: bang_eof,
2444 input: "!",
2445 offset: 1,
2446 line: 0,
2447 column: 1,
2448 width: 0,
2449 kind: UnexpectedEndOfToken {
2450 expected: vec!['=', '~'],
2451 },
2452 }
2453
2454 error! {
2455 name: unclosed_parenthesis_in_interpolation,
2456 input: "a:\n echo {{foo(}}",
2457 offset: 15,
2458 line: 1,
2459 column: 12,
2460 width: 0,
2461 kind: MismatchedClosingDelimiter {
2462 close: Delimiter::Brace,
2463 open: Delimiter::Paren,
2464 open_line: 1,
2465 },
2466 }
2467
2468 #[test]
2469 fn presume_error() {
2470 let compile_error = Lexer::new("justfile".as_ref(), "!")
2471 .presume('-')
2472 .unwrap_err();
2473 assert_matches!(
2474 compile_error.token,
2475 Token {
2476 offset: 0,
2477 line: 0,
2478 column: 0,
2479 length: 0,
2480 src: "!",
2481 kind: Unspecified,
2482 path: _,
2483 }
2484 );
2485 assert_matches!(&*compile_error.kind,
2486 Internal { message }
2487 if message == "Lexer presumed character `-`"
2488 );
2489
2490 assert_eq!(
2491 Error::Compile { compile_error }
2492 .color_display(Color::never())
2493 .to_string(),
2494 "error: Internal error, this may indicate a bug in just: Lexer presumed character `-`
2495consider filing an issue: https://github.com/casey/just/issues/new
2496 ——▶ justfile:1:1
2497 │
24981 │ !
2499 │ ^"
2500 );
2501 }
2502}