1pub mod token;
2
3use nom::Parser;
4use nom::bytes::complete::{is_not, take_until, take_while1};
5use nom::character::complete::{digit1, line_ending};
6use nom::combinator::{cut, opt};
7use nom::{
8 IResult,
9 branch::alt,
10 bytes::complete::{escaped_transform, tag, take_while_m_n},
11 character::complete::{alpha1, alphanumeric1, anychar, char, multispace0, none_of, satisfy},
12 combinator::{map, map_opt, map_res, recognize, value},
13 multi::{many0, many1},
14 sequence::{delimited, pair, preceded},
15};
16use nom_locate::{LocatedSpan, position};
17use smol_str::SmolStr;
18use token::{StringSegment, Token, TokenKind};
19
20use crate::error::syntax::SyntaxError;
21use crate::module::ModuleId;
22use crate::number::Number;
23use crate::range::Range;
24
25const MARKDOWN: &str = ".";
26
27type Span<'a> = LocatedSpan<&'a str, ModuleId>;
28
29macro_rules! define_token_parser {
30 ($name:ident, $tag:expr, $kind:expr) => {
31 fn $name(input: Span) -> IResult<Span, Token> {
32 map(tag($tag), |span: Span| {
33 let module_id = span.extra;
34 Token {
35 range: span.into(),
36 kind: $kind,
37 module_id,
38 }
39 })
40 .parse(input)
41 }
42 };
43}
44
45#[derive(Debug, Clone, Default)]
46pub struct Options {
47 pub ignore_errors: bool,
48 pub include_spaces: bool,
49}
50
51pub struct Lexer {
52 options: Options,
53}
54
55impl Lexer {
56 pub fn new(options: Options) -> Self {
57 Self { options }
58 }
59
60 pub fn tokenize(&self, input: &str, module_id: ModuleId) -> Result<Vec<Token>, SyntaxError> {
61 match tokens(Span::new_extra(input, module_id), &self.options) {
62 Ok((span, mut tokens)) => {
63 let eof: Range = span.into();
64
65 if eof.start == eof.end || self.options.ignore_errors {
66 tokens.push(Token {
67 range: eof,
68 kind: TokenKind::Eof,
69 module_id,
70 });
71 Ok(tokens)
72 } else {
73 Err(SyntaxError::UnexpectedToken(Token {
74 range: eof,
75 kind: TokenKind::Eof,
76 module_id,
77 }))
78 }
79 }
80 Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => Err(SyntaxError::UnexpectedToken(Token {
81 range: e.input.into(),
82 kind: TokenKind::Eof,
83 module_id,
84 })),
85 Err(_) => Err(SyntaxError::UnexpectedToken(Token {
86 range: Range::default(),
87 kind: TokenKind::Eof,
88 module_id,
89 })),
90 }
91 }
92}
93
94fn unicode(input: Span) -> IResult<Span, char> {
95 map_opt(
96 map_res(
97 preceded(
98 char('u'),
99 delimited(
100 char('{'),
101 take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit()),
102 char('}'),
103 ),
104 ),
105 |span: Span| u32::from_str_radix(span.fragment(), 16),
106 ),
107 char::from_u32,
108 )
109 .parse(input)
110}
111
112fn unicode4(input: Span) -> IResult<Span, char> {
114 map_opt(
115 map_res(
116 preceded(char('u'), take_while_m_n(4, 4, |c: char| c.is_ascii_hexdigit())),
117 |span: Span| u32::from_str_radix(span.fragment(), 16),
118 ),
119 char::from_u32,
120 )
121 .parse(input)
122}
123
124fn hex_escape(input: Span) -> IResult<Span, char> {
125 map_opt(
126 map_res(
127 preceded(char('x'), take_while_m_n(2, 2, |c: char| c.is_ascii_hexdigit())),
128 |span: Span| u8::from_str_radix(span.fragment(), 16),
129 ),
130 |byte| char::from_u32(byte as u32),
131 )
132 .parse(input)
133}
134
135fn inline_comment(input: Span) -> IResult<Span, Token> {
136 let (span, _) = char('#')(input)?;
137 let (span, start) = position(span)?;
138 let (span, comment_text) = opt(is_not("\n\r")).parse(span)?;
139 let (span, end) = position(span)?;
140
141 let module_id = start.extra;
142 let comment_str = comment_text.map(|s: Span| s.fragment().to_string()).unwrap_or_default();
143
144 Ok((
145 span,
146 Token {
147 range: Range {
148 start: start.into(),
149 end: end.into(),
150 },
151 kind: TokenKind::Comment(comment_str),
152 module_id,
153 },
154 ))
155}
156
157fn skip_comment(input: Span) -> IResult<Span, ()> {
159 let (span, _) = char('#')(input)?;
160 let (span, _) = opt(is_not("\n\r")).parse(span)?;
161 Ok((span, ()))
162}
163
164fn newline(input: Span) -> IResult<Span, Token> {
165 map(line_ending, |span: Span| {
166 let module_id = span.extra;
167 Token {
168 range: span.into(),
169 kind: TokenKind::NewLine,
170 module_id,
171 }
172 })
173 .parse(input)
174}
175
176fn tab(input: Span) -> IResult<Span, Token> {
177 map(take_while1(|c| c == '\t'), |span: Span| {
178 let module_id = span.extra;
179 let num = span.fragment().len();
180 Token {
181 range: span.into(),
182 kind: TokenKind::Tab(num),
183 module_id,
184 }
185 })
186 .parse(input)
187}
188
189fn spaces(input: Span) -> IResult<Span, Token> {
190 map(take_while1(|c| c == ' '), |span: Span| {
191 let module_id = span.extra;
192 let num = span.fragment().len();
193 Token {
194 range: span.into(),
195 kind: TokenKind::Whitespace(num),
196 module_id,
197 }
198 })
199 .parse(input)
200}
201
202define_token_parser!(colon, ":", TokenKind::Colon);
203define_token_parser!(comma, ",", TokenKind::Comma);
204define_token_parser!(double_colon, "::", TokenKind::DoubleColon);
205define_token_parser!(empty_string, "\"\"", TokenKind::StringLiteral(String::new()));
206define_token_parser!(eq_eq, "==", TokenKind::EqEq);
207define_token_parser!(equal, "=", TokenKind::Equal);
208define_token_parser!(l_bracket, "[", TokenKind::LBracket);
209define_token_parser!(l_paren, "(", TokenKind::LParen);
210define_token_parser!(l_brace, "{", TokenKind::LBrace);
211define_token_parser!(asterisk, "*", TokenKind::Asterisk);
212define_token_parser!(minus, "-", TokenKind::Minus);
213define_token_parser!(slash, "/", TokenKind::Slash);
214define_token_parser!(ne_eq, "!=", TokenKind::NeEq);
215define_token_parser!(plus, "+", TokenKind::Plus);
216define_token_parser!(pipe, "|", TokenKind::Pipe);
217define_token_parser!(percent, "%", TokenKind::Percent);
218define_token_parser!(range_op, "..", TokenKind::DoubleDot);
219define_token_parser!(r_bracket, "]", TokenKind::RBracket);
220define_token_parser!(r_paren, ")", TokenKind::RParen);
221define_token_parser!(r_brace, "}", TokenKind::RBrace);
222define_token_parser!(semi_colon, ";", TokenKind::SemiColon);
223define_token_parser!(lt, "<", TokenKind::Lt);
224define_token_parser!(lte, "<=", TokenKind::Lte);
225define_token_parser!(gt, ">", TokenKind::Gt);
226define_token_parser!(gte, ">=", TokenKind::Gte);
227define_token_parser!(and, "&&", TokenKind::And);
228define_token_parser!(or, "||", TokenKind::Or);
229define_token_parser!(not, "!", TokenKind::Not);
230define_token_parser!(question, "?", TokenKind::Question);
231define_token_parser!(coalesce, "??", TokenKind::Coalesce);
232define_token_parser!(plus_equal, "+=", TokenKind::PlusEqual);
233define_token_parser!(minus_equal, "-=", TokenKind::MinusEqual);
234define_token_parser!(star_equal, "*=", TokenKind::StarEqual);
235define_token_parser!(slash_equal, "/=", TokenKind::SlashEqual);
236define_token_parser!(percent_equal, "%=", TokenKind::PercentEqual);
237define_token_parser!(double_slash_equal, "//=", TokenKind::DoubleSlashEqual);
238define_token_parser!(pipe_equal, "|=", TokenKind::PipeEqual);
239define_token_parser!(tilde_equal, "=~", TokenKind::TildeEqual);
240define_token_parser!(not_tilde_equal, "!~", TokenKind::NotTildeEqual);
241define_token_parser!(left_shift, "<<", TokenKind::LeftShift);
242define_token_parser!(right_shift, ">>", TokenKind::RightShift);
243define_token_parser!(convert_op, "@", TokenKind::Convert);
244define_token_parser!(arrow, "->", TokenKind::Arrow);
245
246fn punctuations(input: Span) -> IResult<Span, Token> {
247 alt((
248 and,
249 or,
250 l_paren,
251 r_paren,
252 l_brace,
253 r_brace,
254 comma,
255 double_colon,
256 colon,
257 semi_colon,
258 l_bracket,
259 r_bracket,
260 coalesce,
261 question,
262 pipe,
263 ))
264 .parse(input)
265}
266
267fn lambda_op(input: Span) -> IResult<Span, Token> {
268 alt((arrow,)).parse(input)
269}
270
271fn assignment_op(input: Span) -> IResult<Span, Token> {
272 alt((
273 plus_equal,
274 minus_equal,
275 star_equal,
276 slash_equal,
277 percent_equal,
278 double_slash_equal,
279 pipe_equal,
280 ))
281 .parse(input)
282}
283
284fn binary_op(input: Span) -> IResult<Span, Token> {
285 alt((
286 convert_op,
287 assignment_op,
288 eq_eq,
289 ne_eq,
290 left_shift,
291 right_shift,
292 tilde_equal,
293 not_tilde_equal,
294 lte,
295 gte,
296 lt,
297 gt,
298 equal,
299 plus,
300 minus,
301 asterisk,
302 slash,
303 percent,
304 range_op,
305 ))
306 .parse(input)
307}
308
309fn unary_op(input: Span) -> IResult<Span, Token> {
310 alt((not,)).parse(input)
311}
312
313fn number_literal(input: Span) -> IResult<Span, Token> {
314 map_res(
315 recognize(pair(
316 opt(char('-')),
317 recognize((
318 opt(alt((char('+'), char('-')))),
319 alt((
320 map((digit1, opt(pair(char('.'), digit1))), |_| ()),
321 map((char('.'), digit1), |_| ()),
322 )),
323 opt((
324 alt((char('e'), char('E'))),
325 opt(alt((char('+'), char('-')))),
326 cut(digit1),
327 )),
328 )),
329 )),
330 |span: Span| {
331 str::parse(span.fragment()).map(|s| {
332 let module_id = span.extra;
333 Token {
334 range: span.into(),
335 kind: TokenKind::NumberLiteral(Number::new(s)),
336 module_id,
337 }
338 })
339 },
340 )
341 .parse(input)
342}
343
344fn interpolation_expr(input: Span) -> IResult<Span, Span> {
345 delimited(tag("${"), take_until("}"), char('}')).parse(input)
346}
347
348fn string_segment<'a>(input: Span<'a>) -> IResult<Span<'a>, StringSegment> {
349 alt((
350 map(
351 |input: Span<'a>| {
352 let (span, start) = position(input)?;
353 let (span, expr) = interpolation_expr(span)?;
354 let (span, end) = position(span)?;
355 Ok((
356 span,
357 (
358 expr,
359 Range {
360 start: start.into(),
361 end: end.into(),
362 },
363 ),
364 ))
365 },
366 |(expr, range)| StringSegment::Expr(expr.to_string().into(), range),
367 ),
368 map(
369 |input| {
370 let (span, start) = position(input)?;
371 let (span, text) = escaped_transform(
372 none_of("\"\\${"),
373 '\\',
374 alt((
375 value('\\', char('\\')),
376 value('\"', char('\"')),
377 value('\r', char('r')),
378 value('\n', char('n')),
379 value('\t', char('t')),
380 value('{', char('{')),
381 value('}', char('}')),
382 hex_escape,
383 unicode,
384 unicode4,
385 )),
386 )(span)?;
387 let (span, end) = position(span)?;
388 Ok((
389 span,
390 (
391 text,
392 Range {
393 start: start.into(),
394 end: end.into(),
395 },
396 ),
397 ))
398 },
399 |(text, range)| StringSegment::Text(text, range),
400 ),
401 map(
402 |input: Span<'a>| {
403 let (span, start) = position(input)?;
404 let (span, _) = tag("$$")(span)?;
405 let (span, end) = position(span)?;
406 Ok((
407 span,
408 (
409 "$".to_string(),
410 Range {
411 start: start.into(),
412 end: end.into(),
413 },
414 ),
415 ))
416 },
417 |(text, range)| StringSegment::Text(text, range),
418 ),
419 ))
420 .parse(input)
421}
422
423fn byte_escape_seq(input: Span) -> IResult<Span, u8> {
424 preceded(
425 char('\\'),
426 alt((
427 preceded(
428 char('x'),
429 map_res(take_while_m_n(2, 2, |c: char| c.is_ascii_hexdigit()), |hex: Span| {
430 u8::from_str_radix(hex.fragment(), 16)
431 }),
432 ),
433 value(b'\\', char('\\')),
434 value(b'"', char('"')),
435 value(b'\n', char('n')),
436 value(b'\r', char('r')),
437 value(b'\t', char('t')),
438 value(b'\0', char('0')),
439 )),
440 )
441 .parse(input)
442}
443
444fn byte_string_literal(input: Span) -> IResult<Span, Token> {
445 let (span, start) = position(input)?;
446 let (span, _) = tag("b\"")(span)?;
447
448 let (span, byte_segments) = many0(alt((
449 map(byte_escape_seq, |b| vec![b]),
450 map(satisfy(|c: char| c.is_ascii() && c != '"' && c != '\\'), |c| {
453 vec![c as u8]
454 }),
455 )))
456 .parse(span)?;
457
458 let (span, _) = char('"').parse(span)?;
459 let (span, end) = position(span)?;
460 let bytes: Vec<u8> = byte_segments.into_iter().flatten().collect();
461
462 Ok((
463 span,
464 Token {
465 range: Range {
466 start: start.into(),
467 end: end.into(),
468 },
469 kind: TokenKind::BytesLiteral(bytes),
470 module_id: start.extra,
471 },
472 ))
473}
474
475fn interpolated_string(input: Span) -> IResult<Span, Token> {
476 let (span, start) = position(input)?;
477 let (span, _) = tag("s\"")(span)?;
478
479 let mut segments = Vec::with_capacity(4);
480 let mut current = span;
481
482 let (remaining, segment) = string_segment(current)?;
484 segments.push(segment);
485 current = remaining;
486
487 while let Ok((remaining, segment)) = string_segment(current) {
489 segments.push(segment);
490 current = remaining;
491 }
492
493 let (span, _) = char('"')(current)?;
494 let (span, end) = position(span)?;
495 let module_id = start.extra;
496
497 Ok((
498 span,
499 Token {
500 range: Range {
501 start: start.into(),
502 end: end.into(),
503 },
504 kind: TokenKind::InterpolatedString(segments),
505 module_id,
506 },
507 ))
508}
509
510fn string_literal(input: Span) -> IResult<Span, Token> {
511 let (span, start) = position(input)?;
512 let (span, s) = delimited(
513 char('"'),
514 escaped_transform(
515 none_of("\"\\"),
516 '\\',
517 alt((
518 alt((
519 value('\\', char('\\')),
520 value('\"', char('\"')),
521 value('\r', char('r')),
522 value('\n', char('n')),
523 value('\t', char('t')),
524 value('/', char('/')),
525 value('[', char('[')),
526 value(']', char(']')),
527 value('(', char('(')),
528 value(')', char(')')),
529 value('{', char('{')),
530 value('}', char('}')),
531 )),
532 alt((
533 value('+', char('+')),
534 value('*', char('*')),
535 value('?', char('?')),
536 value('^', char('^')),
537 value('$', char('$')),
538 value('|', char('|')),
539 value('-', char('-')),
540 value('.', char('.')),
541 value('s', char('s')), value('S', char('S')), value('d', char('d')), value('D', char('D')), value('w', char('w')), value('W', char('W')), hex_escape,
548 unicode,
549 unicode4,
550 )),
551 )),
552 ),
553 char('"'),
554 )
555 .parse(span)?;
556 let (span, end) = position(span)?;
557 let module_id = start.extra;
558
559 Ok((
560 span,
561 Token {
562 range: Range {
563 start: start.into(),
564 end: end.into(),
565 },
566 kind: TokenKind::StringLiteral(s),
567 module_id,
568 },
569 ))
570}
571
572fn literals(input: Span) -> IResult<Span, Token> {
573 alt((
574 byte_string_literal,
575 string_literal,
576 interpolated_string,
577 empty_string,
578 number_literal,
579 ))
580 .parse(input)
581}
582
583fn selector(input: Span) -> IResult<Span, Token> {
589 map(
590 recognize(pair(
591 tag(MARKDOWN),
592 alt((
593 tag(">"),
594 tag("^"),
595 recognize(pair(
597 char('"'),
598 pair(
599 many0(alt((recognize(pair(char('\\'), anychar)), recognize(none_of("\"\\"))))),
600 char('"'),
601 ),
602 )),
603 recognize(many0(alt((alphanumeric1, tag("_"), tag("-"), tag("*"))))),
604 )),
605 )),
606 |span: Span| {
607 let module_id = span.extra;
608 Token {
609 range: span.into(),
610 kind: TokenKind::Selector(SmolStr::new(span.fragment())),
611 module_id,
612 }
613 },
614 )
615 .parse(input)
616}
617
618fn ident_or_keyword(input: Span) -> IResult<Span, Token> {
626 let (after_base, base_span) =
627 recognize(pair(alt((alpha1, tag("_"))), many0(alt((alphanumeric1, tag("_")))))).parse(input)?;
628
629 let module_id = base_span.extra;
630 let base_frag = *base_span.fragment();
631
632 let next_char = after_base.fragment().chars().next();
633 let at_word_boundary = next_char.map(|c| !c.is_alphanumeric() && c != '_').unwrap_or(true);
636
637 if at_word_boundary {
638 let keyword_kind = match base_frag {
639 "as" => Some(TokenKind::As),
640 "break" => Some(TokenKind::Break),
641 "catch" => Some(TokenKind::Catch),
642 "continue" => Some(TokenKind::Continue),
643 "def" => Some(TokenKind::Def),
644 "do" => Some(TokenKind::Do),
645 "elif" => Some(TokenKind::Elif),
646 "else" => Some(TokenKind::Else),
647 "end" => Some(TokenKind::End),
648 "fn" => Some(TokenKind::Fn),
649 "foreach" => Some(TokenKind::Foreach),
650 "if" => Some(TokenKind::If),
651 "import" => Some(TokenKind::Import),
652 "include" => Some(TokenKind::Include),
653 "let" => Some(TokenKind::Let),
654 "loop" => Some(TokenKind::Loop),
655 "macro" => Some(TokenKind::Macro),
656 "match" => Some(TokenKind::Match),
657 "module" => Some(TokenKind::Module),
658 "nodes" => Some(TokenKind::Nodes),
659 "None" => Some(TokenKind::None),
660 "quote" => Some(TokenKind::Quote),
661 "self" => Some(TokenKind::Self_),
662 "try" => Some(TokenKind::Try),
663 "unquote" => Some(TokenKind::Unquote),
664 "var" => Some(TokenKind::Var),
665 "while" => Some(TokenKind::While),
666 _ => None,
667 };
668
669 if let Some(kind) = keyword_kind {
670 return Ok((
671 after_base,
672 Token {
673 range: base_span.into(),
674 kind,
675 module_id,
676 },
677 ));
678 }
679 }
680
681 if next_char == Some('-') || next_char == Some('*') {
685 let (after_full, full_span) = recognize(pair(
686 alt((alpha1, tag("_"))),
687 many0(alt((alphanumeric1, tag("_"), tag("-"), tag("*")))),
688 ))
689 .parse(input)?;
690
691 let full_frag = *full_span.fragment();
692 let kind = match full_frag {
693 "true" => TokenKind::BoolLiteral(true),
694 "false" => TokenKind::BoolLiteral(false),
695 s => TokenKind::Ident(SmolStr::new(s)),
696 };
697
698 return Ok((
699 after_full,
700 Token {
701 range: full_span.into(),
702 kind,
703 module_id: full_span.extra,
704 },
705 ));
706 }
707
708 let kind = match base_frag {
709 "true" => TokenKind::BoolLiteral(true),
710 "false" => TokenKind::BoolLiteral(false),
711 s => TokenKind::Ident(SmolStr::new(s)),
712 };
713
714 Ok((
715 after_base,
716 Token {
717 range: base_span.into(),
718 kind,
719 module_id,
720 },
721 ))
722}
723
724fn env(input: Span) -> IResult<Span, Token> {
725 preceded(
726 tag("$"),
727 map(recognize(many1(alt((alphanumeric1, tag("_"))))), |span: Span| {
728 let kind = TokenKind::Env(SmolStr::new(span.fragment()));
729 let module_id = span.extra;
730 Token {
731 range: span.into(),
732 kind,
733 module_id,
734 }
735 }),
736 )
737 .parse(input)
738}
739
740fn skip_whitespace_and_comments(input: Span) -> IResult<Span, ()> {
741 let mut current = input;
742 loop {
743 let (remaining, _) = multispace0(current)?;
744 if let Ok((after_comment, ())) = skip_comment(remaining) {
745 current = after_comment;
746 } else {
747 return Ok((remaining, ()));
748 }
749 }
750}
751
752fn token(input: Span) -> IResult<Span, Token> {
753 alt((
754 env,
755 literals,
756 lambda_op,
757 binary_op,
758 punctuations,
759 unary_op,
760 selector,
761 ident_or_keyword,
762 ))
763 .parse(input)
764}
765
766fn token_include_spaces(input: Span) -> IResult<Span, Token> {
767 alt((
768 newline,
769 spaces,
770 tab,
771 inline_comment,
772 env,
773 literals,
774 lambda_op,
775 binary_op,
776 punctuations,
777 unary_op,
778 selector,
779 ident_or_keyword,
780 ))
781 .parse(input)
782}
783
784fn tokens<'a>(input: Span<'a>, options: &'a Options) -> IResult<Span<'a>, Vec<Token>> {
785 let estimated_capacity = input.fragment().len() / 5;
786 let mut tokens = Vec::with_capacity(estimated_capacity.max(16));
787 let mut current = input;
788
789 if options.include_spaces {
790 while let Ok((remaining, token)) = token_include_spaces(current) {
791 tokens.push(token);
792 current = remaining;
793 }
794 } else {
795 loop {
796 let (remaining, _) = skip_whitespace_and_comments(current)?;
797 match token(remaining) {
798 Ok((remaining, tok)) => {
799 tokens.push(tok);
800 current = remaining;
801 }
802 Err(_) => {
803 current = remaining;
804 break;
805 }
806 }
807 }
808 }
809
810 Ok((current, tokens))
811}
812
813#[cfg(test)]
814mod tests {
815 use crate::range::Position;
816
817 use super::*;
818 use rstest::rstest;
819
820 #[rstest]
821 #[case("and(contains(\"test\"))",
822 Options::default(),
823 Ok(vec![
824 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 4} }, kind: TokenKind::Ident(SmolStr::new("and")), module_id: 1.into()},
825 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 5} }, kind: TokenKind::LParen, module_id: 1.into()},
826 Token{range: Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 13} }, kind: TokenKind::Ident(SmolStr::new("contains")), module_id: 1.into()},
827 Token{range: Range { start: Position {line: 1, column: 13}, end: Position {line: 1, column: 14} }, kind: TokenKind::LParen, module_id: 1.into()},
828 Token{range: Range { start: Position {line: 1, column: 14}, end: Position {line: 1, column: 20} }, kind: TokenKind::StringLiteral("test".to_string()), module_id: 1.into()},
829 Token{range: Range { start: Position {line: 1, column: 20}, end: Position {line: 1, column: 21} }, kind: TokenKind::RParen, module_id: 1.into()},
830 Token{range: Range { start: Position {line: 1, column: 21}, end: Position {line: 1, column: 22} }, kind: TokenKind::RParen, module_id: 1.into()},
831 Token{range: Range { start: Position {line: 1, column: 22}, end: Position {line: 1, column: 22} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
832 #[case("and(contains(\"test\")) | or(endswith(\"test\"))",
833 Options::default(),
834 Ok(vec![
835 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 4} }, kind: TokenKind::Ident(SmolStr::new("and")), module_id: 1.into()},
836 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 5} }, kind: TokenKind::LParen, module_id: 1.into()},
837 Token{range: Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 13} }, kind: TokenKind::Ident(SmolStr::new("contains")), module_id: 1.into()},
838 Token{range: Range { start: Position {line: 1, column: 13}, end: Position {line: 1, column: 14} }, kind: TokenKind::LParen, module_id: 1.into()},
839 Token{range: Range { start: Position {line: 1, column: 14}, end: Position {line: 1, column: 20} }, kind: TokenKind::StringLiteral("test".to_string()), module_id: 1.into()},
840 Token{range: Range { start: Position {line: 1, column: 20}, end: Position {line: 1, column: 21} }, kind: TokenKind::RParen, module_id: 1.into()},
841 Token{range: Range { start: Position {line: 1, column: 21}, end: Position {line: 1, column: 22} }, kind: TokenKind::RParen, module_id: 1.into()},
842 Token{range: Range { start: Position {line: 1, column: 23}, end: Position {line: 1, column: 24} }, kind: TokenKind::Pipe, module_id: 1.into()},
843 Token{range: Range { start: Position {line: 1, column: 25}, end: Position {line: 1, column: 27} }, kind: TokenKind::Ident(SmolStr::new("or")), module_id: 1.into()},
844 Token{range: Range { start: Position {line: 1, column: 27}, end: Position {line: 1, column: 28} }, kind: TokenKind::LParen, module_id: 1.into()},
845 Token{range: Range { start: Position {line: 1, column: 28}, end: Position {line: 1, column: 36} }, kind: TokenKind::Ident(SmolStr::new("endswith")), module_id: 1.into()},
846 Token{range: Range { start: Position {line: 1, column: 36}, end: Position {line: 1, column: 37} }, kind: TokenKind::LParen, module_id: 1.into()},
847 Token{range: Range { start: Position {line: 1, column: 37}, end: Position {line: 1, column: 43} }, kind: TokenKind::StringLiteral("test".to_string()), module_id: 1.into()},
848 Token{range: Range { start: Position {line: 1, column: 43}, end: Position {line: 1, column: 44} }, kind: TokenKind::RParen, module_id: 1.into()},
849 Token{range: Range { start: Position {line: 1, column: 44}, end: Position {line: 1, column: 45} }, kind: TokenKind::RParen, module_id: 1.into()},
850 Token{range: Range { start: Position {line: 1, column: 45}, end: Position {line: 1, column: 45} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
851 #[case("eq(length(), 10)",
852 Options::default(),
853 Ok(vec![
854 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 3} }, kind: TokenKind::Ident(SmolStr::new("eq")), module_id: 1.into()},
855 Token{range: Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 4} }, kind: TokenKind::LParen, module_id: 1.into()},
856 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 10} }, kind: TokenKind::Ident(SmolStr::new("length")), module_id: 1.into()},
857 Token{range: Range { start: Position {line: 1, column: 10}, end: Position {line: 1, column: 11} }, kind: TokenKind::LParen, module_id: 1.into()},
858 Token{range: Range { start: Position {line: 1, column: 11}, end: Position {line: 1, column: 12} }, kind: TokenKind::RParen, module_id: 1.into()},
859 Token{range: Range { start: Position {line: 1, column: 12}, end: Position {line: 1, column: 13} }, kind: TokenKind::Comma, module_id: 1.into()},
860 Token{range: Range { start: Position {line: 1, column: 14}, end: Position {line: 1, column: 16} }, kind: TokenKind::NumberLiteral(10.into()), module_id: 1.into()},
861 Token{range: Range { start: Position {line: 1, column: 16}, end: Position {line: 1, column: 17} }, kind: TokenKind::RParen, module_id: 1.into()},
862 Token{range: Range { start: Position {line: 1, column: 17}, end: Position {line: 1, column: 17} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
863 #[case("or(.h1, .**)",
864 Options::default(),
865 Ok(vec![
866 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 3} }, kind: TokenKind::Ident(SmolStr::new("or")), module_id: 1.into()},
867 Token{range: Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 4} }, kind: TokenKind::LParen, module_id: 1.into()},
868 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 7} }, kind: TokenKind::Selector(SmolStr::new(".h1")), module_id: 1.into()},
869 Token{range: Range { start: Position {line: 1, column: 7}, end: Position {line: 1, column: 8} }, kind: TokenKind::Comma, module_id: 1.into()},
870 Token{range: Range { start: Position {line: 1, column: 9}, end: Position {line: 1, column: 12} }, kind: TokenKind::Selector(SmolStr::new(".**")), module_id: 1.into()},
871 Token{range: Range { start: Position {line: 1, column: 12}, end: Position {line: 1, column: 13} }, kind: TokenKind::RParen, module_id: 1.into()},
872 Token{range: Range { start: Position {line: 1, column: 13}, end: Position {line: 1, column: 13} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
873 #[case("or(.[][], .[])",
874 Options::default(),
875 Ok(vec![
876 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 3} }, kind: TokenKind::Ident(SmolStr::new("or")), module_id: 1.into()},
877 Token{range: Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 4} }, kind: TokenKind::LParen, module_id: 1.into()},
878 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 5} }, kind: TokenKind::Selector(SmolStr::new(".")), module_id: 1.into()},
879 Token{range: Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 6} }, kind: TokenKind::LBracket, module_id: 1.into()},
880 Token{range: Range { start: Position {line: 1, column: 6}, end: Position {line: 1, column: 7} }, kind: TokenKind::RBracket, module_id: 1.into()},
881 Token{range: Range { start: Position {line: 1, column: 7}, end: Position {line: 1, column: 8} }, kind: TokenKind::LBracket, module_id: 1.into()},
882 Token{range: Range { start: Position {line: 1, column: 8}, end: Position {line: 1, column: 9} }, kind: TokenKind::RBracket, module_id: 1.into()},
883 Token{range: Range { start: Position {line: 1, column: 9}, end: Position {line: 1, column: 10} }, kind: TokenKind::Comma, module_id: 1.into()},
884 Token{range: Range { start: Position {line: 1, column: 11}, end: Position {line: 1, column: 12} }, kind: TokenKind::Selector(SmolStr::new(".")), module_id: 1.into()},
885 Token{range: Range { start: Position {line: 1, column: 12}, end: Position {line: 1, column: 13} }, kind: TokenKind::LBracket, module_id: 1.into()},
886 Token{range: Range { start: Position {line: 1, column: 13}, end: Position {line: 1, column: 14} }, kind: TokenKind::RBracket, module_id: 1.into()},
887 Token{range: Range { start: Position {line: 1, column: 14}, end: Position {line: 1, column: 15} }, kind: TokenKind::RParen, module_id: 1.into()},
888 Token{range: Range { start: Position {line: 1, column: 15}, end: Position {line: 1, column: 15} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
889 #[case("startswith(\"\\u{0061}\")",
890 Options::default(),
891 Ok(vec![
892 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 11} }, kind: TokenKind::Ident(SmolStr::new("startswith")), module_id: 1.into()},
893 Token{range: Range { start: Position {line: 1, column: 11}, end: Position {line: 1, column: 12} }, kind: TokenKind::LParen, module_id: 1.into()},
894 Token{range: Range { start: Position {line: 1, column: 12}, end: Position {line: 1, column: 22} }, kind: TokenKind::StringLiteral("a".to_string()), module_id: 1.into()},
895 Token{range: Range { start: Position {line: 1, column: 22}, end: Position {line: 1, column: 23} }, kind: TokenKind::RParen, module_id: 1.into()},
896 Token{range: Range { start: Position {line: 1, column: 23}, end: Position {line: 1, column: 23} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
897 #[case("endswith($ENV)",
898 Options::default(),
899 Ok(vec![
900 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 9} }, kind: TokenKind::Ident(SmolStr::new("endswith")), module_id: 1.into()},
901 Token{range: Range { start: Position {line: 1, column: 9}, end: Position {line: 1, column: 10} }, kind: TokenKind::LParen, module_id: 1.into()},
902 Token{range: Range { start: Position {line: 1, column: 11}, end: Position {line: 1, column: 14} }, kind: TokenKind::Env(SmolStr::new("ENV")), module_id: 1.into()},
903 Token{range: Range { start: Position {line: 1, column: 14}, end: Position {line: 1, column: 15} }, kind: TokenKind::RParen, module_id: 1.into()},
904 Token{range: Range { start: Position {line: 1, column: 15}, end: Position {line: 1, column: 15} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
905 #[case("def check(arg1, arg2): startswith(\"\\u{0061}\")",
906 Options::default(),
907 Ok(vec![
908 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 4} }, kind: TokenKind::Def, module_id: 1.into()},
909 Token{range: Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 10} }, kind: TokenKind::Ident(SmolStr::new("check")), module_id: 1.into()},
910 Token{range: Range { start: Position {line: 1, column: 10}, end: Position {line: 1, column: 11} }, kind: TokenKind::LParen, module_id: 1.into()},
911 Token{range: Range { start: Position {line: 1, column: 11}, end: Position {line: 1, column: 15} }, kind: TokenKind::Ident(SmolStr::new("arg1")), module_id: 1.into()},
912 Token{range: Range { start: Position {line: 1, column: 15}, end: Position {line: 1, column: 16} }, kind: TokenKind::Comma, module_id: 1.into()},
913 Token{range: Range { start: Position {line: 1, column: 17}, end: Position {line: 1, column: 21} }, kind: TokenKind::Ident(SmolStr::new("arg2")), module_id: 1.into()},
914 Token{range: Range { start: Position {line: 1, column: 21}, end: Position {line: 1, column: 22} }, kind: TokenKind::RParen, module_id: 1.into()},
915 Token{range: Range { start: Position {line: 1, column: 22}, end: Position {line: 1, column: 23} }, kind: TokenKind::Colon, module_id: 1.into()},
916 Token{range: Range { start: Position {line: 1, column: 24}, end: Position {line: 1, column: 34} }, kind: TokenKind::Ident(SmolStr::new("startswith")), module_id: 1.into()},
917 Token{range: Range { start: Position {line: 1, column: 34}, end: Position {line: 1, column: 35} }, kind: TokenKind::LParen, module_id: 1.into()},
918 Token{range: Range { start: Position {line: 1, column: 35}, end: Position {line: 1, column: 45} }, kind: TokenKind::StringLiteral("a".to_string()), module_id: 1.into()},
919 Token{range: Range { start: Position {line: 1, column: 45}, end: Position {line: 1, column: 46} }, kind: TokenKind::RParen, module_id: 1.into()},
920 Token{range: Range { start: Position {line: 1, column: 46}, end: Position {line: 1, column: 46} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
921 #[case("\"test",
922 Options::default(),
923 Err(SyntaxError::UnexpectedToken(Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 6} }, kind: TokenKind::Eof, module_id: 1.into()})))]
924 #[case::new_line("and(\ncontains(\"test\"))",
925 Options{include_spaces: true, ignore_errors: true},
926 Ok(vec![
927 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 4} }, kind: TokenKind::Ident(SmolStr::new("and")), module_id: 1.into()},
928 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 5} }, kind: TokenKind::LParen, module_id: 1.into()},
929 Token{range: Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 6} }, kind: TokenKind::NewLine, module_id: 1.into()},
930 Token{range: Range { start: Position {line: 2, column: 1}, end: Position {line: 2, column: 9} }, kind: TokenKind::Ident(SmolStr::new("contains")), module_id: 1.into()},
931 Token{range: Range { start: Position {line: 2, column: 9}, end: Position {line: 2, column: 10} }, kind: TokenKind::LParen, module_id: 1.into()},
932 Token{range: Range { start: Position {line: 2, column: 10}, end: Position {line: 2, column: 16} }, kind: TokenKind::StringLiteral("test".to_string()), module_id: 1.into()},
933 Token{range: Range { start: Position {line: 2, column: 16}, end: Position {line: 2, column: 17} }, kind: TokenKind::RParen, module_id: 1.into()},
934 Token{range: Range { start: Position {line: 2, column: 17}, end: Position {line: 2, column: 18} }, kind: TokenKind::RParen, module_id: 1.into()},
935 Token{range: Range { start: Position {line: 2, column: 18}, end: Position {line: 2, column: 18} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
936 #[case("and(\ncontains(\"test\")) | or(\nendswith(\"test\"))",
937 Options{include_spaces: true, ignore_errors: true},
938 Ok(vec![
939 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 4} }, kind: TokenKind::Ident(SmolStr::new("and")), module_id: 1.into()},
940 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 5} }, kind: TokenKind::LParen, module_id: 1.into()},
941 Token{range: Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 6} }, kind: TokenKind::NewLine, module_id: 1.into()},
942 Token{range: Range { start: Position {line: 2, column: 1}, end: Position {line: 2, column: 9} }, kind: TokenKind::Ident(SmolStr::new("contains")), module_id: 1.into()},
943 Token{range: Range { start: Position {line: 2, column: 9}, end: Position {line: 2, column: 10} }, kind: TokenKind::LParen, module_id: 1.into()},
944 Token{range: Range { start: Position {line: 2, column: 10}, end: Position {line: 2, column: 16} }, kind: TokenKind::StringLiteral("test".to_string()), module_id: 1.into()},
945 Token{range: Range { start: Position {line: 2, column: 16}, end: Position {line: 2, column: 17} }, kind: TokenKind::RParen, module_id: 1.into()},
946 Token{range: Range { start: Position {line: 2, column: 17}, end: Position {line: 2, column: 18} }, kind: TokenKind::RParen, module_id: 1.into()},
947 Token{range: Range { start: Position {line: 2, column: 18}, end: Position {line: 2, column: 19} }, kind: TokenKind::Whitespace(1), module_id: 1.into()},
948 Token{range: Range { start: Position {line: 2, column: 19}, end: Position {line: 2, column: 20} }, kind: TokenKind::Pipe, module_id: 1.into()},
949 Token{range: Range { start: Position {line: 2, column: 20}, end: Position {line: 2, column: 21} }, kind: TokenKind::Whitespace(1), module_id: 1.into()},
950 Token{range: Range { start: Position {line: 2, column: 21}, end: Position {line: 2, column: 23} }, kind: TokenKind::Ident(SmolStr::new("or")), module_id: 1.into()},
951 Token{range: Range { start: Position {line: 2, column: 23}, end: Position {line: 2, column: 24} }, kind: TokenKind::LParen, module_id: 1.into()},
952 Token{range: Range { start: Position {line: 2, column: 24}, end: Position {line: 2, column: 25} }, kind: TokenKind::NewLine, module_id: 1.into()},
953 Token{range: Range { start: Position {line: 3, column: 1}, end: Position {line: 3, column: 9} }, kind: TokenKind::Ident(SmolStr::new("endswith")), module_id: 1.into()},
954 Token{range: Range { start: Position {line: 3, column: 9}, end: Position {line: 3, column: 10} }, kind: TokenKind::LParen, module_id: 1.into()},
955 Token{range: Range { start: Position {line: 3, column: 10}, end: Position {line: 3, column: 16} }, kind: TokenKind::StringLiteral("test".to_string()), module_id: 1.into()},
956 Token{range: Range { start: Position {line: 3, column: 16}, end: Position {line: 3, column: 17} }, kind: TokenKind::RParen, module_id: 1.into()},
957 Token{range: Range { start: Position {line: 3, column: 17}, end: Position {line: 3, column: 18} }, kind: TokenKind::RParen, module_id: 1.into()},
958 Token{range: Range { start: Position {line: 3, column: 18}, end: Position {line: 3, column: 18} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
959 #[case::tab("and(\tcontains(\"test\"))",
960 Options{include_spaces: true, ignore_errors: true},
961 Ok(vec![
962 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 4} }, kind: TokenKind::Ident(SmolStr::new("and")), module_id: 1.into()},
963 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 5} }, kind: TokenKind::LParen, module_id: 1.into()},
964 Token{range: Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 6} }, kind: TokenKind::Tab(1), module_id: 1.into()},
965 Token{range: Range { start: Position {line: 1, column: 6}, end: Position {line: 1, column: 14} }, kind: TokenKind::Ident(SmolStr::new("contains")), module_id: 1.into()},
966 Token{range: Range { start: Position {line: 1, column: 14}, end: Position {line: 1, column: 15} }, kind: TokenKind::LParen, module_id: 1.into()},
967 Token{range: Range { start: Position {line: 1, column: 15}, end: Position {line: 1, column: 21} }, kind: TokenKind::StringLiteral("test".to_string()), module_id: 1.into()},
968 Token{range: Range { start: Position {line: 1, column: 21}, end: Position {line: 1, column: 22} }, kind: TokenKind::RParen, module_id: 1.into()},
969 Token{range: Range { start: Position {line: 1, column: 22}, end: Position {line: 1, column: 23} }, kind: TokenKind::RParen, module_id: 1.into()},
970 Token{range: Range { start: Position {line: 1, column: 23}, end: Position {line: 1, column: 23} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
971 #[case::interpolated_string("s\"test${val1}test\n\"",
972 Options{include_spaces: true, ignore_errors: true},
973 Ok(vec![Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 2, column: 2} },
974 kind: TokenKind::InterpolatedString(vec![
975 StringSegment::Text("test".to_string(), Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 7} }),
976 StringSegment::Expr("val1".to_string().into(), Range { start: Position {line: 1, column: 7}, end: Position {line: 1, column: 14} }),
977 StringSegment::Text("test\n".to_string(), Range { start: Position {line: 1, column: 14}, end: Position {line: 2, column: 1 }})
978 ]), module_id: 1.into()},
979 Token{range: Range { start: Position {line: 2, column: 2}, end: Position {line: 2, column: 2} }, kind: TokenKind::Eof, module_id: 1.into()}]
980 ))]
981 #[case::error("\"test",
982 Options{include_spaces: false, ignore_errors: false},
983 Err(SyntaxError::UnexpectedToken(Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 6} }, kind: TokenKind::Eof, module_id: 1.into()})))]
984 #[case::error("s\"$$${test}$$\"",
985 Options{include_spaces: false, ignore_errors: false},
986 Ok(vec![Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 15} },
987 kind: TokenKind::InterpolatedString(vec![
988 StringSegment::Text("$".to_string(), Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 5} }),
989 StringSegment::Expr("test".to_string().into(), Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 12} }),
990 StringSegment::Text("$".to_string(), Range { start: Position {line: 1, column: 12}, end: Position {line: 1, column: 14 }})
991 ]), module_id: 1.into()},
992 Token{range: Range { start: Position {line: 1, column: 15}, end: Position {line: 1, column: 15} }, kind: TokenKind::Eof, module_id: 1.into()}]
993 ))]
994 #[case::function_declaration("fn(): program;",
995 Options::default(),
996 Ok(vec![
997 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 3} }, kind: TokenKind::Fn, module_id: 1.into()},
998 Token{range: Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 4} }, kind: TokenKind::LParen, module_id: 1.into()},
999 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 5} }, kind: TokenKind::RParen, module_id: 1.into()},
1000 Token{range: Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 6} }, kind: TokenKind::Colon, module_id: 1.into()},
1001 Token{range: Range { start: Position {line: 1, column: 7}, end: Position {line: 1, column: 14} }, kind: TokenKind::Ident(SmolStr::new("program")), module_id: 1.into()},
1002 Token{range: Range { start: Position {line: 1, column: 14}, end: Position {line: 1, column: 15} }, kind: TokenKind::SemiColon, module_id: 1.into()},
1003 Token{range: Range { start: Position {line: 1, column: 15}, end: Position {line: 1, column: 15} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1004 #[case::end_keyword("end",
1005 Options::default(),
1006 Ok(vec![
1007 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 4} }, kind: TokenKind::End, module_id: 1.into()},
1008 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 4} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1009 #[case::function_declaration_with_end("fn(): program end",
1010 Options::default(),
1011 Ok(vec![
1012 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 3} }, kind: TokenKind::Fn, module_id: 1.into()},
1013 Token{range: Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 4} }, kind: TokenKind::LParen, module_id: 1.into()},
1014 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 5} }, kind: TokenKind::RParen, module_id: 1.into()},
1015 Token{range: Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 6} }, kind: TokenKind::Colon, module_id: 1.into()},
1016 Token{range: Range { start: Position {line: 1, column: 7}, end: Position {line: 1, column: 14} }, kind: TokenKind::Ident(SmolStr::new("program")), module_id: 1.into()},
1017 Token{range: Range { start: Position {line: 1, column: 15}, end: Position {line: 1, column: 18} }, kind: TokenKind::End, module_id: 1.into()},
1018 Token{range: Range { start: Position {line: 1, column: 18}, end: Position {line: 1, column: 18} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1019 #[case::eq_eq1("==",
1020 Options::default(),
1021 Ok(vec![
1022 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 3} }, kind: TokenKind::EqEq, module_id: 1.into()},
1023 Token{range: Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 3} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1024 #[case::eq_eq2("=",
1025 Options::default(),
1026 Ok(vec![
1027 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 2} }, kind: TokenKind::Equal, module_id: 1.into()},
1028 Token{range: Range { start: Position {line: 1, column: 2}, end: Position {line: 1, column: 2} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1029 #[case::eq_eq3("===",
1030 Options::default(),
1031 Ok(vec![
1032 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 3} }, kind: TokenKind::EqEq, module_id: 1.into()},
1033 Token{range: Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 4} }, kind: TokenKind::Equal, module_id: 1.into()},
1034 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 4} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1035 #[case::eq_eq4("== =",
1036 Options{include_spaces: true, ignore_errors: false},
1037 Ok(vec![
1038 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 3} }, kind: TokenKind::EqEq, module_id: 1.into()},
1039 Token{range: Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 4} }, kind: TokenKind::Whitespace(1), module_id: 1.into()},
1040 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 5} }, kind: TokenKind::Equal, module_id: 1.into()},
1041 Token{range: Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 5} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1042 #[case::eq_eq5("== =",
1043 Options{include_spaces: false, ignore_errors: false}, Ok(vec![
1045 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 3} }, kind: TokenKind::EqEq, module_id: 1.into()},
1046 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 5} }, kind: TokenKind::Equal, module_id: 1.into()},
1047 Token{range: Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 5} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1048 #[case::ne_eq1("!=",
1049 Options::default(),
1050 Ok(vec![
1051 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 3} }, kind: TokenKind::NeEq, module_id: 1.into()},
1052 Token{range: Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 3} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1053 #[case::ne_eq2("!==",
1054 Options::default(),
1055 Ok(vec![
1056 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 3} }, kind: TokenKind::NeEq, module_id: 1.into()},
1057 Token{range: Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 4} }, kind: TokenKind::Equal, module_id: 1.into()},
1058 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 4} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1059 #[case::ne_eq3("!= =",
1060 Options{include_spaces: true, ignore_errors: false},
1061 Ok(vec![
1062 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 3} }, kind: TokenKind::NeEq, module_id: 1.into()},
1063 Token{range: Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 4} }, kind: TokenKind::Whitespace(1), module_id: 1.into()},
1064 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 5} }, kind: TokenKind::Equal, module_id: 1.into()},
1065 Token{range: Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 5} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1066 #[case::ne_eq4("!= =",
1067 Options{include_spaces: false, ignore_errors: false}, Ok(vec![
1069 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 3} }, kind: TokenKind::NeEq, module_id: 1.into()},
1070 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 5} }, kind: TokenKind::Equal, module_id: 1.into()},
1071 Token{range: Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 5} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1072 #[case("{}",
1073 Options::default(),
1074 Ok(vec![
1075 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 2} }, kind: TokenKind::LBrace, module_id: 1.into()},
1076 Token{range: Range { start: Position {line: 1, column: 2}, end: Position {line: 1, column: 3} }, kind: TokenKind::RBrace, module_id: 1.into()},
1077 Token{range: Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 3} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1078 #[case(" { } ",
1079 Options::default(),
1080 Ok(vec![
1081 Token{range: Range { start: Position {line: 1, column: 2}, end: Position {line: 1, column: 3} }, kind: TokenKind::LBrace, module_id: 1.into()},
1082 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 5} }, kind: TokenKind::RBrace, module_id: 1.into()},
1083 Token{range: Range { start: Position {line: 1, column: 6}, end: Position {line: 1, column: 6} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1084 #[case("{key: value}", Options::default(),
1086 Ok(vec![
1087 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 2} }, kind: TokenKind::LBrace, module_id: 1.into()},
1088 Token{range: Range { start: Position {line: 1, column: 2}, end: Position {line: 1, column: 5} }, kind: TokenKind::Ident(SmolStr::new("key")), module_id: 1.into()},
1089 Token{range: Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 6} }, kind: TokenKind::Colon, module_id: 1.into()},
1090 Token{range: Range { start: Position {line: 1, column: 7}, end: Position {line: 1, column: 12} }, kind: TokenKind::Ident(SmolStr::new("value")), module_id: 1.into()},
1091 Token{range: Range { start: Position {line: 1, column: 12}, end: Position {line: 1, column: 13} }, kind: TokenKind::RBrace, module_id: 1.into()},
1092 Token{range: Range { start: Position {line: 1, column: 13}, end: Position {line: 1, column: 13} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1093 #[case::selector_with_dot_h_text(".h.text",
1094 Options::default(),
1095 Ok(vec![
1096 Token {
1097 range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 3 } },
1098 kind: TokenKind::Selector(SmolStr::new(".h")),
1099 module_id: 1.into(),
1100 },
1101 Token {
1102 range: Range { start: Position { line: 1, column: 3 }, end: Position { line: 1, column: 8 } },
1103 kind: TokenKind::Selector(SmolStr::new(".text")),
1104 module_id: 1.into(),
1105 },
1106 Token {
1107 range: Range { start: Position { line: 1, column: 8 }, end: Position { line: 1, column: 8 } },
1108 kind: TokenKind::Eof,
1109 module_id: 1.into(),
1110 }
1111 ])
1112 )]
1113 #[case::selector_with_dot_h_level(".h.level",
1114 Options::default(),
1115 Ok(vec![
1116 Token {
1117 range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 3 } },
1118 kind: TokenKind::Selector(SmolStr::new(".h")),
1119 module_id: 1.into(),
1120 },
1121 Token {
1122 range: Range { start: Position { line: 1, column: 3 }, end: Position { line: 1, column: 9 } },
1123 kind: TokenKind::Selector(SmolStr::new(".level")),
1124 module_id: 1.into(),
1125 },
1126 Token {
1127 range: Range { start: Position { line: 1, column: 9 }, end: Position { line: 1, column: 9 } },
1128 kind: TokenKind::Eof,
1129 module_id: 1.into(),
1130 }
1131 ])
1132 )]
1133 #[case::selector_blockquote_alias(".>",
1134 Options::default(),
1135 Ok(vec![
1136 Token {
1137 range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 3 } },
1138 kind: TokenKind::Selector(SmolStr::new(".>")),
1139 module_id: 1.into(),
1140 },
1141 Token {
1142 range: Range { start: Position { line: 1, column: 3 }, end: Position { line: 1, column: 3 } },
1143 kind: TokenKind::Eof,
1144 module_id: 1.into(),
1145 }
1146 ])
1147 )]
1148 #[case::selector_footnote_alias(".^",
1149 Options::default(),
1150 Ok(vec![
1151 Token {
1152 range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 3 } },
1153 kind: TokenKind::Selector(SmolStr::new(".^")),
1154 module_id: 1.into(),
1155 },
1156 Token {
1157 range: Range { start: Position { line: 1, column: 3 }, end: Position { line: 1, column: 3 } },
1158 kind: TokenKind::Eof,
1159 module_id: 1.into(),
1160 }
1161 ])
1162 )]
1163 #[case::selector_blockquote_in_expression("select(.>)",
1164 Options::default(),
1165 Ok(vec![
1166 Token {
1167 range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 7 } },
1168 kind: TokenKind::Ident(SmolStr::new("select")),
1169 module_id: 1.into(),
1170 },
1171 Token {
1172 range: Range { start: Position { line: 1, column: 7 }, end: Position { line: 1, column: 8 } },
1173 kind: TokenKind::LParen,
1174 module_id: 1.into(),
1175 },
1176 Token {
1177 range: Range { start: Position { line: 1, column: 8 }, end: Position { line: 1, column: 10 } },
1178 kind: TokenKind::Selector(SmolStr::new(".>")),
1179 module_id: 1.into(),
1180 },
1181 Token {
1182 range: Range { start: Position { line: 1, column: 10 }, end: Position { line: 1, column: 11 } },
1183 kind: TokenKind::RParen,
1184 module_id: 1.into(),
1185 },
1186 Token {
1187 range: Range { start: Position { line: 1, column: 11 }, end: Position { line: 1, column: 11 } },
1188 kind: TokenKind::Eof,
1189 module_id: 1.into(),
1190 }
1191 ])
1192 )]
1193 #[case::hex_escape_sequence("print(\"\\x1b[2J\\x1b[H\")",
1194 Options::default(),
1195 Ok(vec![
1196 Token {
1197 range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 6 } },
1198 kind: TokenKind::Ident(SmolStr::new("print")),
1199 module_id: 1.into(),
1200 },
1201 Token {
1202 range: Range { start: Position { line: 1, column: 6 }, end: Position { line: 1, column: 7 } },
1203 kind: TokenKind::LParen,
1204 module_id: 1.into(),
1205 },
1206 Token {
1207 range: Range { start: Position { line: 1, column: 7 }, end: Position { line: 1, column: 22 } },
1208 kind: TokenKind::StringLiteral("\x1b[2J\x1b[H".to_string()),
1209 module_id: 1.into(),
1210 },
1211 Token {
1212 range: Range { start: Position { line: 1, column: 22 }, end: Position { line: 1, column: 23 } },
1213 kind: TokenKind::RParen,
1214 module_id: 1.into(),
1215 },
1216 Token {
1217 range: Range { start: Position { line: 1, column: 23 }, end: Position { line: 1, column: 23 } },
1218 kind: TokenKind::Eof,
1219 module_id: 1.into(),
1220 }
1221 ])
1222 )]
1223 #[case::keyword_boundary_def("definition",
1224 Options::default(),
1225 Ok(vec![
1226 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 11} }, kind: TokenKind::Ident(SmolStr::new("definition")), module_id: 1.into()},
1227 Token{range: Range { start: Position {line: 1, column: 11}, end: Position {line: 1, column: 11} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1228 #[case::keyword_boundary_end("ending",
1229 Options::default(),
1230 Ok(vec![
1231 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 7} }, kind: TokenKind::Ident(SmolStr::new("ending")), module_id: 1.into()},
1232 Token{range: Range { start: Position {line: 1, column: 7}, end: Position {line: 1, column: 7} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1233 #[case::keyword_boundary_if("ifconfig",
1234 Options::default(),
1235 Ok(vec![
1236 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 9} }, kind: TokenKind::Ident(SmolStr::new("ifconfig")), module_id: 1.into()},
1237 Token{range: Range { start: Position {line: 1, column: 9}, end: Position {line: 1, column: 9} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1238 #[case::keyword_proper_def("def ",
1239 Options::default(),
1240 Ok(vec![
1241 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 4} }, kind: TokenKind::Def, module_id: 1.into()},
1242 Token{range: Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 5} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1243 #[case::keyword_proper_end("end ",
1244 Options::default(),
1245 Ok(vec![
1246 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 4} }, kind: TokenKind::End, module_id: 1.into()},
1247 Token{range: Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 5} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1248 #[case::keyword_boundary_non_ascii_def("defä",
1251 Options{ignore_errors: true, include_spaces: false},
1252 Ok(vec![
1253 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 4} }, kind: TokenKind::Ident(SmolStr::new("def")), module_id: 1.into()},
1254 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 5} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1255 #[case::keyword_boundary_non_ascii_if("ifé",
1256 Options{ignore_errors: true, include_spaces: false},
1257 Ok(vec![
1258 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 3} }, kind: TokenKind::Ident(SmolStr::new("if")), module_id: 1.into()},
1259 Token{range: Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 4} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1260 #[case::number_regex("\"^(-?(?:0|[1-9]\\\\d*)(?:\\\\.\\\\d+)?(?:[eE][+-]?\\\\d+)?)\"",
1261 Options::default(),
1262 Ok(vec![
1263 Token {
1264 range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 53 } },
1265 kind: TokenKind::StringLiteral("^(-?(?:0|[1-9]\\d*)(?:\\.\\d+)?(?:[eE][+-]?\\d+)?)".to_string()),
1266 module_id: 1.into(),
1267 },
1268 Token {
1269 range: Range { start: Position { line: 1, column: 53 }, end: Position { line: 1, column: 53 } },
1270 kind: TokenKind::Eof,
1271 module_id: 1.into(),
1272 }
1273 ])
1274 )]
1275 #[case::regex_with_brackets("\"[a-zA-Z0-9]+\"",
1276 Options::default(),
1277 Ok(vec![
1278 Token {
1279 range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 15 } },
1280 kind: TokenKind::StringLiteral("[a-zA-Z0-9]+".to_string()),
1281 module_id: 1.into(),
1282 },
1283 Token {
1284 range: Range { start: Position { line: 1, column: 15 }, end: Position { line: 1, column: 15 } },
1285 kind: TokenKind::Eof,
1286 module_id: 1.into(),
1287 }
1288 ])
1289 )]
1290 #[case::regex_with_escaped_chars("\"\\\\[\\\\(\\\\)\\\\{\\\\}\\\\+\\\\*\\\\?\\\\^\\\\$\\\\|\"",
1291 Options::default(),
1292 Ok(vec![
1293 Token {
1294 range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 36 } },
1295 kind: TokenKind::StringLiteral("\\[\\(\\)\\{\\}\\+\\*\\?\\^\\$\\|".to_string()),
1296 module_id: 1.into(),
1297 },
1298 Token {
1299 range: Range { start: Position { line: 1, column: 36 }, end: Position { line: 1, column: 36 } },
1300 kind: TokenKind::Eof,
1301 module_id: 1.into(),
1302 }
1303 ])
1304 )]
1305 #[case::regex_character_classes("\"\\s\\S\\d\\D\\w\\W\"",
1306 Options::default(),
1307 Ok(vec![
1308 Token {
1309 range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 15 } },
1310 kind: TokenKind::StringLiteral("sSdDwW".to_string()),
1311 module_id: 1.into(),
1312 },
1313 Token {
1314 range: Range { start: Position { line: 1, column: 15 }, end: Position { line: 1, column: 15 } },
1315 kind: TokenKind::Eof,
1316 module_id: 1.into(),
1317 }
1318 ])
1319 )]
1320 #[case::regex_mixed_with_character_classes("\"[a-z]\\d+\\s*\"",
1321 Options::default(),
1322 Ok(vec![
1323 Token {
1324 range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 14 } },
1325 kind: TokenKind::StringLiteral("[a-z]d+s*".to_string()),
1326 module_id: 1.into(),
1327 },
1328 Token {
1329 range: Range { start: Position { line: 1, column: 14 }, end: Position { line: 1, column: 14 } },
1330 kind: TokenKind::Eof,
1331 module_id: 1.into(),
1332 }
1333 ])
1334 )]
1335 #[case::pipe_with_comment("| \"test\" # comment",
1336 Options::default(),
1337 Ok(vec![
1338 Token {
1339 range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 2 } },
1340 kind: TokenKind::Pipe,
1341 module_id: 1.into(),
1342 },
1343 Token {
1344 range: Range { start: Position { line: 1, column: 3 }, end: Position { line: 1, column: 9 } },
1345 kind: TokenKind::StringLiteral("test".to_string()),
1346 module_id: 1.into(),
1347 },
1348 Token {
1349 range: Range { start: Position { line: 1, column: 19 }, end: Position { line: 1, column: 19 } },
1350 kind: TokenKind::Eof,
1351 module_id: 1.into(),
1352 }
1353 ])
1354 )]
1355 #[case::comment_with_pipe_character("# comment with | pipe",
1356 Options::default(),
1357 Ok(vec![
1358 Token {
1359 range: Range { start: Position { line: 1, column: 22 }, end: Position { line: 1, column: 22 } },
1360 kind: TokenKind::Eof,
1361 module_id: 1.into(),
1362 }
1363 ])
1364 )]
1365 #[case::comment_with_empty_line("#\n# test",
1366 Options::default(),
1367 Ok(vec![
1368 Token {
1369 range: Range { start: Position { line: 2, column: 7 }, end: Position { line: 2, column: 7 } },
1370 kind: TokenKind::Eof,
1371 module_id: 1.into(),
1372 }
1373 ])
1374 )]
1375 #[case::comment_hash_only("#",
1376 Options::default(),
1377 Ok(vec![
1378 Token {
1379 range: Range { start: Position { line: 1, column: 2 }, end: Position { line: 1, column: 2 } },
1380 kind: TokenKind::Eof,
1381 module_id: 1.into(),
1382 }
1383 ])
1384 )]
1385 #[case::interpolated_string_with_escaped_braces("s\"test\\{escaped\\}\"",
1386 Options{include_spaces: false, ignore_errors: false},
1387 Ok(vec![Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 19} },
1388 kind: TokenKind::InterpolatedString(vec![
1389 StringSegment::Text("test{escaped}".to_string(), Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 18} })
1390 ]), module_id: 1.into()},
1391 Token{range: Range { start: Position {line: 1, column: 19}, end: Position {line: 1, column: 19} }, kind: TokenKind::Eof, module_id: 1.into()}]
1392 ))]
1393 #[case::interpolated_string_mixed_escape_and_expr("s\"\\{${var}\\}\"",
1394 Options{include_spaces: false, ignore_errors: false},
1395 Ok(vec![Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 14} },
1396 kind: TokenKind::InterpolatedString(vec![
1397 StringSegment::Text("{".to_string(), Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 5} }),
1398 StringSegment::Expr("var".to_string().into(), Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 11} }),
1399 StringSegment::Text("}".to_string(), Range { start: Position {line: 1, column: 11}, end: Position {line: 1, column: 13} })
1400 ]), module_id: 1.into()},
1401 Token{range: Range { start: Position {line: 1, column: 14}, end: Position {line: 1, column: 14} }, kind: TokenKind::Eof, module_id: 1.into()}]
1402 ))]
1403 #[case::unicode4_hiragana("\"\\u3041\"",
1404 Options::default(),
1405 Ok(vec![
1406 Token {
1407 range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 9 } },
1408 kind: TokenKind::StringLiteral("ぁ".to_string()),
1409 module_id: 1.into(),
1410 },
1411 Token {
1412 range: Range { start: Position { line: 1, column: 9 }, end: Position { line: 1, column: 9 } },
1413 kind: TokenKind::Eof,
1414 module_id: 1.into(),
1415 }
1416 ])
1417 )]
1418 #[case::unicode4_katakana("\"\\u30A1\"",
1419 Options::default(),
1420 Ok(vec![
1421 Token {
1422 range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 9 } },
1423 kind: TokenKind::StringLiteral("ァ".to_string()),
1424 module_id: 1.into(),
1425 },
1426 Token {
1427 range: Range { start: Position { line: 1, column: 9 }, end: Position { line: 1, column: 9 } },
1428 kind: TokenKind::Eof,
1429 module_id: 1.into(),
1430 }
1431 ])
1432 )]
1433 #[case::unicode4_in_regex_char_class("\"[\\u3041-\\u3096]+\"",
1434 Options::default(),
1435 Ok(vec![
1436 Token {
1437 range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 19 } },
1438 kind: TokenKind::StringLiteral("[ぁ-ゖ]+".to_string()),
1439 module_id: 1.into(),
1440 },
1441 Token {
1442 range: Range { start: Position { line: 1, column: 19 }, end: Position { line: 1, column: 19 } },
1443 kind: TokenKind::Eof,
1444 module_id: 1.into(),
1445 }
1446 ])
1447 )]
1448 #[case::unterminated_string_reports_position("\"unterminated",
1449 Options::default(),
1450 Err(SyntaxError::UnexpectedToken(Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 14} }, kind: TokenKind::Eof, module_id: 1.into()})))]
1451 #[case::arrow("->",
1452 Options::default(),
1453 Ok(vec![
1454 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 3} }, kind: TokenKind::Arrow, module_id: 1.into()},
1455 Token{range: Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 3} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1456 #[case::arrow_in_expression("map(->(x):upcase;)",
1457 Options::default(),
1458 Ok(vec![
1459 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 4} }, kind: TokenKind::Ident(SmolStr::new("map")), module_id: 1.into()},
1460 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 5} }, kind: TokenKind::LParen, module_id: 1.into()},
1461 Token{range: Range { start: Position {line: 1, column: 5}, end: Position {line: 1, column: 7} }, kind: TokenKind::Arrow, module_id: 1.into()},
1462 Token{range: Range { start: Position {line: 1, column: 7}, end: Position {line: 1, column: 8} }, kind: TokenKind::LParen, module_id: 1.into()},
1463 Token{range: Range { start: Position {line: 1, column: 8}, end: Position {line: 1, column: 9} }, kind: TokenKind::Ident(SmolStr::new("x")), module_id: 1.into()},
1464 Token{range: Range { start: Position {line: 1, column: 9}, end: Position {line: 1, column: 10} }, kind: TokenKind::RParen, module_id: 1.into()},
1465 Token{range: Range { start: Position {line: 1, column: 10}, end: Position {line: 1, column: 11} }, kind: TokenKind::Colon, module_id: 1.into()},
1466 Token{range: Range { start: Position {line: 1, column: 11}, end: Position {line: 1, column: 17} }, kind: TokenKind::Ident(SmolStr::new("upcase")), module_id: 1.into()},
1467 Token{range: Range { start: Position {line: 1, column: 17}, end: Position {line: 1, column: 18} }, kind: TokenKind::SemiColon, module_id: 1.into()},
1468 Token{range: Range { start: Position {line: 1, column: 18}, end: Position {line: 1, column: 19} }, kind: TokenKind::RParen, module_id: 1.into()},
1469 Token{range: Range { start: Position {line: 1, column: 19}, end: Position {line: 1, column: 19} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1470 #[case::arrow_not_minus("- >",
1471 Options::default(),
1472 Ok(vec![
1473 Token{range: Range { start: Position {line: 1, column: 1}, end: Position {line: 1, column: 2} }, kind: TokenKind::Minus, module_id: 1.into()},
1474 Token{range: Range { start: Position {line: 1, column: 3}, end: Position {line: 1, column: 4} }, kind: TokenKind::Gt, module_id: 1.into()},
1475 Token{range: Range { start: Position {line: 1, column: 4}, end: Position {line: 1, column: 4} }, kind: TokenKind::Eof, module_id: 1.into()}]))]
1476
1477 fn test_parse(#[case] input: &str, #[case] options: Options, #[case] expected: Result<Vec<Token>, SyntaxError>) {
1478 assert_eq!(Lexer::new(options).tokenize(input, 1.into()), expected);
1479 }
1480
1481 #[rstest]
1482 #[case::basic(r#"b"abc""#,
1483 Options::default(),
1484 Ok(vec![
1485 Token { range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 7 } },
1486 kind: TokenKind::BytesLiteral(vec![97, 98, 99]), module_id: 1.into() },
1487 Token { range: Range { start: Position { line: 1, column: 7 }, end: Position { line: 1, column: 7 } },
1488 kind: TokenKind::Eof, module_id: 1.into() },
1489 ])
1490 )]
1491 #[case::hex_escape(r#"b"\xf0\x9f""#,
1492 Options::default(),
1493 Ok(vec![
1494 Token { range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 12 } },
1495 kind: TokenKind::BytesLiteral(vec![0xf0, 0x9f]), module_id: 1.into() },
1496 Token { range: Range { start: Position { line: 1, column: 12 }, end: Position { line: 1, column: 12 } },
1497 kind: TokenKind::Eof, module_id: 1.into() },
1498 ])
1499 )]
1500 #[case::standard_escapes(r#"b"\n\r\t\\""#,
1501 Options::default(),
1502 Ok(vec![
1503 Token { range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 12 } },
1504 kind: TokenKind::BytesLiteral(vec![b'\n', b'\r', b'\t', b'\\']), module_id: 1.into() },
1505 Token { range: Range { start: Position { line: 1, column: 12 }, end: Position { line: 1, column: 12 } },
1506 kind: TokenKind::Eof, module_id: 1.into() },
1507 ])
1508 )]
1509 #[case::empty(r#"b"""#,
1510 Options::default(),
1511 Ok(vec![
1512 Token { range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 4 } },
1513 kind: TokenKind::BytesLiteral(vec![]), module_id: 1.into() },
1514 Token { range: Range { start: Position { line: 1, column: 4 }, end: Position { line: 1, column: 4 } },
1515 kind: TokenKind::Eof, module_id: 1.into() },
1516 ])
1517 )]
1518 #[case::non_ascii_not_a_byte_literal(
1522 "b\"\u{00e9}\"",
1523 Options::default(),
1524 Ok(vec![
1525 Token { range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 2 } },
1526 kind: TokenKind::Ident(SmolStr::new("b")), module_id: 1.into() },
1527 Token { range: Range { start: Position { line: 1, column: 2 }, end: Position { line: 1, column: 5 } },
1528 kind: TokenKind::StringLiteral("\u{00e9}".to_string()), module_id: 1.into() },
1529 Token { range: Range { start: Position { line: 1, column: 5 }, end: Position { line: 1, column: 5 } },
1530 kind: TokenKind::Eof, module_id: 1.into() },
1531 ])
1532 )]
1533 #[case::b_ident_without_quote("b foo",
1534 Options::default(),
1535 Ok(vec![
1536 Token { range: Range { start: Position { line: 1, column: 1 }, end: Position { line: 1, column: 2 } },
1537 kind: TokenKind::Ident(SmolStr::new("b")), module_id: 1.into() },
1538 Token { range: Range { start: Position { line: 1, column: 3 }, end: Position { line: 1, column: 6 } },
1539 kind: TokenKind::Ident(SmolStr::new("foo")), module_id: 1.into() },
1540 Token { range: Range { start: Position { line: 1, column: 6 }, end: Position { line: 1, column: 6 } },
1541 kind: TokenKind::Eof, module_id: 1.into() },
1542 ])
1543 )]
1544 fn test_byte_string_literal(
1545 #[case] input: &str,
1546 #[case] options: Options,
1547 #[case] expected: Result<Vec<Token>, SyntaxError>,
1548 ) {
1549 assert_eq!(Lexer::new(options).tokenize(input, 1.into()), expected);
1550 }
1551}