1#![forbid(unsafe_code)]
2use mel_syntax::{LexDiagnostic, Lexed, Token, TokenKind, text_range};
8
9pub struct Lexer<'a> {
10 input: &'a str,
11 bytes: &'a [u8],
12 offset: usize,
13 emitted_eof: bool,
14 diagnostics: Vec<LexDiagnostic>,
15 significant_only: bool,
16 reject_block_comments: bool,
17}
18
19#[derive(Debug, Clone, Copy, Default)]
20pub(crate) struct LexerPolicy {
21 pub(crate) reject_block_comments: bool,
22}
23
24impl<'a> Lexer<'a> {
25 #[must_use]
26 pub fn new(input: &'a str) -> Self {
27 Self::with_options(input, false)
28 }
29
30 #[must_use]
31 pub fn significant(input: &'a str) -> Self {
32 Self::with_options(input, true)
33 }
34
35 #[must_use]
36 pub(crate) fn significant_with_policy(input: &'a str, policy: LexerPolicy) -> Self {
37 Self::with_options_and_policy(input, true, policy)
38 }
39
40 fn with_options(input: &'a str, significant_only: bool) -> Self {
41 Self::with_options_and_policy(input, significant_only, LexerPolicy::default())
42 }
43
44 fn with_options_and_policy(
45 input: &'a str,
46 significant_only: bool,
47 policy: LexerPolicy,
48 ) -> Self {
49 Self {
50 input,
51 bytes: input.as_bytes(),
52 offset: 0,
53 emitted_eof: false,
54 diagnostics: Vec::new(),
55 significant_only,
56 reject_block_comments: policy.reject_block_comments,
57 }
58 }
59
60 #[must_use]
61 pub fn finish(self) -> Vec<LexDiagnostic> {
62 self.diagnostics
63 }
64
65 fn next_token_internal(&mut self) -> Option<Token> {
66 if self.emitted_eof {
67 return None;
68 }
69
70 loop {
71 if self.offset >= self.bytes.len() {
72 self.emitted_eof = true;
73 let eof = self.input.len() as u32;
74 return Some(Token::new(TokenKind::Eof, text_range(eof, eof)));
75 }
76
77 let bytes = self.bytes;
78 let mut i = self.offset;
79 let token = match bytes[i] {
80 b' ' | b'\t' | b'\r' | b'\n' => {
81 let start = i;
82 i = lex_whitespace(bytes, i);
83 Token::new(TokenKind::Whitespace, text_range(start as u32, i as u32))
84 }
85 b'/' if matches!(bytes.get(i + 1), Some(b'/')) => {
86 let start = i;
87 i = lex_line_comment(bytes, i);
88 Token::new(TokenKind::LineComment, text_range(start as u32, i as u32))
89 }
90 b'/' if matches!(bytes.get(i + 1), Some(b'*')) => {
91 let start = i;
92 let (end, terminated) = lex_block_comment(bytes, i);
93 i = end;
94 if self.reject_block_comments {
95 self.diagnostics.push(LexDiagnostic::new(
96 "block comments are not allowed in expression mode",
97 text_range(start as u32, end as u32),
98 ));
99 }
100 if !terminated {
101 self.diagnostics.push(LexDiagnostic::new(
102 "unterminated block comment",
103 text_range(start as u32, end as u32),
104 ));
105 }
106 Token::new(
107 TokenKind::BlockComment,
108 text_range(start as u32, end as u32),
109 )
110 }
111 b';' => advance_token(TokenKind::Semi, i, i + 1, &mut i),
112 b'(' => advance_token(TokenKind::LParen, i, i + 1, &mut i),
113 b')' => advance_token(TokenKind::RParen, i, i + 1, &mut i),
114 b'[' => advance_token(TokenKind::LBracket, i, i + 1, &mut i),
115 b']' => advance_token(TokenKind::RBracket, i, i + 1, &mut i),
116 b'{' => advance_token(TokenKind::LBrace, i, i + 1, &mut i),
117 b'}' => advance_token(TokenKind::RBrace, i, i + 1, &mut i),
118 b'.' if bytes
119 .get(i + 1)
120 .copied()
121 .is_some_and(|b| b.is_ascii_digit()) =>
122 {
123 let start = i;
124 i += 1;
125 while bytes.get(i).copied().is_some_and(|b| b.is_ascii_digit()) {
126 i += 1;
127 }
128
129 if let Some(end) = lex_exponent_suffix(bytes, i) {
130 i = end;
131 }
132
133 Token::new(TokenKind::FloatLiteral, text_range(start as u32, i as u32))
134 }
135 b'.' => advance_token(TokenKind::Dot, i, i + 1, &mut i),
136 b',' => advance_token(TokenKind::Comma, i, i + 1, &mut i),
137 b'$' => advance_token(TokenKind::Dollar, i, i + 1, &mut i),
138 b'`' => advance_token(TokenKind::Backquote, i, i + 1, &mut i),
139 b'?' => advance_token(TokenKind::Question, i, i + 1, &mut i),
140 b':' => advance_token(TokenKind::Colon, i, i + 1, &mut i),
141 b'+' if matches!(bytes.get(i + 1), Some(b'=')) => {
142 advance_token(TokenKind::PlusEq, i, i + 2, &mut i)
143 }
144 b'+' if matches!(bytes.get(i + 1), Some(b'+')) => {
145 advance_token(TokenKind::PlusPlus, i, i + 2, &mut i)
146 }
147 b'+' => advance_token(TokenKind::Plus, i, i + 1, &mut i),
148 b'*' if matches!(bytes.get(i + 1), Some(b'=')) => {
149 advance_token(TokenKind::StarEq, i, i + 2, &mut i)
150 }
151 b'*' => advance_token(TokenKind::Star, i, i + 1, &mut i),
152 b'/' if matches!(bytes.get(i + 1), Some(b'=')) => {
153 advance_token(TokenKind::SlashEq, i, i + 2, &mut i)
154 }
155 b'/' => advance_token(TokenKind::Slash, i, i + 1, &mut i),
156 b'%' => advance_token(TokenKind::Percent, i, i + 1, &mut i),
157 b'^' => advance_token(TokenKind::Caret, i, i + 1, &mut i),
158 b'!' if matches!(bytes.get(i + 1), Some(b'=')) => {
159 advance_token(TokenKind::NotEq, i, i + 2, &mut i)
160 }
161 b'!' => advance_token(TokenKind::Bang, i, i + 1, &mut i),
162 b'=' if matches!(bytes.get(i + 1), Some(b'=')) => {
163 advance_token(TokenKind::EqEq, i, i + 2, &mut i)
164 }
165 b'=' => advance_token(TokenKind::Assign, i, i + 1, &mut i),
166 b'<' if matches!(bytes.get(i + 1), Some(b'<')) => {
167 advance_token(TokenKind::LtLt, i, i + 2, &mut i)
168 }
169 b'<' if matches!(bytes.get(i + 1), Some(b'=')) => {
170 advance_token(TokenKind::Le, i, i + 2, &mut i)
171 }
172 b'<' => advance_token(TokenKind::Lt, i, i + 1, &mut i),
173 b'>' if matches!(bytes.get(i + 1), Some(b'>')) => {
174 advance_token(TokenKind::GtGt, i, i + 2, &mut i)
175 }
176 b'>' if matches!(bytes.get(i + 1), Some(b'=')) => {
177 advance_token(TokenKind::Ge, i, i + 2, &mut i)
178 }
179 b'>' => advance_token(TokenKind::Gt, i, i + 1, &mut i),
180 b'&' if matches!(bytes.get(i + 1), Some(b'&')) => {
181 advance_token(TokenKind::AndAnd, i, i + 2, &mut i)
182 }
183 b'|' if matches!(bytes.get(i + 1), Some(b'|')) => {
184 advance_token(TokenKind::OrOr, i, i + 2, &mut i)
185 }
186 b'|' => advance_token(TokenKind::Pipe, i, i + 1, &mut i),
187 b'-' if matches!(bytes.get(i + 1), Some(b'-')) => {
188 advance_token(TokenKind::MinusMinus, i, i + 2, &mut i)
189 }
190 b'-' if matches!(bytes.get(i + 1), Some(b'=')) => {
191 advance_token(TokenKind::MinusEq, i, i + 2, &mut i)
192 }
193 b'-' if bytes.get(i + 1).copied().is_some_and(is_ident_start_byte)
194 && can_start_flag(bytes, i) =>
195 {
196 let start = i;
197 i += 1;
198 while bytes.get(i).copied().is_some_and(is_ident_continue_byte) {
199 i += 1;
200 }
201 Token::new(TokenKind::Flag, text_range(start as u32, i as u32))
202 }
203 b'-' => advance_token(TokenKind::Minus, i, i + 1, &mut i),
204 b'"' => {
205 let start = i;
206 i += 1;
207 let mut terminated = false;
208 while i < bytes.len() {
209 match bytes[i] {
210 b'\\' => {
211 i += if i + 1 < bytes.len() { 2 } else { 1 };
212 }
213 b'"' => {
214 i += 1;
215 terminated = true;
216 break;
217 }
218 _ => i += 1,
219 }
220 }
221 if !terminated {
222 self.diagnostics.push(LexDiagnostic::new(
223 "unterminated string literal",
224 text_range(start as u32, i as u32),
225 ));
226 }
227 Token::new(TokenKind::StringLiteral, text_range(start as u32, i as u32))
228 }
229 b'0'..=b'9' => {
230 let start = i;
231
232 if bytes[i] == b'0'
233 && matches!(bytes.get(i + 1), Some(b'x' | b'X'))
234 && bytes
235 .get(i + 2)
236 .copied()
237 .is_some_and(|b| b.is_ascii_hexdigit())
238 {
239 i += 2;
240 while bytes.get(i).copied().is_some_and(|b| b.is_ascii_hexdigit()) {
241 i += 1;
242 }
243 self.offset = i;
244 let token =
245 Token::new(TokenKind::IntLiteral, text_range(start as u32, i as u32));
246 if self.significant_only && token.kind.is_trivia() {
247 continue;
248 }
249 return Some(token);
250 }
251
252 i += 1;
253 while bytes.get(i).copied().is_some_and(|b| b.is_ascii_digit()) {
254 i += 1;
255 }
256
257 let mut kind = TokenKind::IntLiteral;
258
259 if matches!(bytes.get(i), Some(b'.')) {
260 if bytes
261 .get(i + 1)
262 .copied()
263 .is_some_and(|b| b.is_ascii_digit())
264 {
265 i += 1;
266 while bytes.get(i).copied().is_some_and(|b| b.is_ascii_digit()) {
267 i += 1;
268 }
269 kind = TokenKind::FloatLiteral;
270 } else if can_end_with_trailing_dot_float(bytes, i + 1) {
271 i += 1;
272 kind = TokenKind::FloatLiteral;
273 }
274 }
275
276 if let Some(end) = lex_exponent_suffix(bytes, i) {
277 i = end;
278 kind = TokenKind::FloatLiteral;
279 }
280
281 Token::new(kind, text_range(start as u32, i as u32))
282 }
283 b if is_ident_start_byte(b) => {
284 let start = i;
285 i += 1;
286 while bytes.get(i).copied().is_some_and(is_ident_continue_byte) {
287 i += 1;
288 }
289 Token::new(TokenKind::Ident, text_range(start as u32, i as u32))
290 }
291 _ => {
292 let start = i;
293 let end = next_codepoint_boundary(self.input, i);
294 self.diagnostics.push(LexDiagnostic::new(
295 "unknown character",
296 text_range(start as u32, end as u32),
297 ));
298 i = end;
299 Token::new(TokenKind::Unknown, text_range(start as u32, end as u32))
300 }
301 };
302
303 self.offset = i;
304 if self.significant_only && token.kind.is_trivia() {
305 continue;
306 }
307 return Some(token);
308 }
309 }
310}
311
312impl Iterator for Lexer<'_> {
313 type Item = Token;
314
315 fn next(&mut self) -> Option<Self::Item> {
316 self.next_token_internal()
317 }
318}
319
320#[must_use]
321pub fn lexer(input: &str) -> Lexer<'_> {
322 Lexer::new(input)
323}
324
325#[must_use]
326pub fn significant_lexer(input: &str) -> Lexer<'_> {
327 Lexer::significant(input)
328}
329
330#[must_use]
331pub fn lex(input: &str) -> Lexed {
332 let mut lexer = lexer(input);
333 let tokens = lexer.by_ref().collect();
334 let diagnostics = lexer.finish();
335 Lexed {
336 tokens,
337 diagnostics,
338 }
339}
340
341#[must_use]
342pub fn lex_significant(input: &str) -> Lexed {
343 let mut lexer = significant_lexer(input);
344 let tokens = lexer.by_ref().collect();
345 let diagnostics = lexer.finish();
346 Lexed {
347 tokens,
348 diagnostics,
349 }
350}
351
352fn advance_token(kind: TokenKind, start: usize, end: usize, index: &mut usize) -> Token {
353 *index = end;
354 Token::new(kind, text_range(start as u32, end as u32))
355}
356
357fn next_codepoint_boundary(input: &str, start: usize) -> usize {
358 debug_assert!(input.is_char_boundary(start));
359 input[start..]
360 .chars()
361 .next()
362 .map_or(input.len(), |ch| start + ch.len_utf8())
363}
364
365fn lex_whitespace(bytes: &[u8], start: usize) -> usize {
366 let mut i = start;
367 while matches!(bytes.get(i), Some(b' ' | b'\t' | b'\r' | b'\n')) {
368 i += 1;
369 }
370 i
371}
372
373fn lex_line_comment(bytes: &[u8], start: usize) -> usize {
374 let mut i = start + 2;
375 while let Some(byte) = bytes.get(i) {
376 if *byte == b'\n' {
377 break;
378 }
379 i += 1;
380 }
381 i
382}
383
384fn lex_block_comment(bytes: &[u8], start: usize) -> (usize, bool) {
385 let mut i = start + 2;
386 while i + 1 < bytes.len() {
387 if bytes[i] == b'*' && bytes[i + 1] == b'/' {
388 return (i + 2, true);
389 }
390 i += 1;
391 }
392 (bytes.len(), false)
393}
394
395fn can_start_flag(bytes: &[u8], index: usize) -> bool {
396 index > 0 && bytes[index - 1].is_ascii_whitespace()
397}
398
399fn is_ident_start_byte(byte: u8) -> bool {
400 byte.is_ascii_alphabetic() || byte == b'_'
401}
402
403fn is_ident_continue_byte(byte: u8) -> bool {
404 is_ident_start_byte(byte) || byte.is_ascii_digit()
405}
406
407fn can_end_with_trailing_dot_float(bytes: &[u8], index: usize) -> bool {
408 match bytes.get(index).copied() {
409 None => true,
410 Some(byte) if byte.is_ascii_whitespace() => true,
411 Some(
412 b';' | b',' | b')' | b']' | b'}' | b'?' | b':' | b'+' | b'-' | b'*' | b'/' | b'%'
413 | b'=' | b'!' | b'<' | b'>' | b'&' | b'|',
414 ) => true,
415 _ => false,
416 }
417}
418
419fn lex_exponent_suffix(bytes: &[u8], start: usize) -> Option<usize> {
420 let exponent = bytes.get(start).copied()?;
421 if !matches!(exponent, b'e' | b'E') {
422 return None;
423 }
424
425 let mut index = start + 1;
426 if matches!(bytes.get(index), Some(b'+' | b'-')) {
427 index += 1;
428 }
429
430 let first_digit = bytes.get(index).copied()?;
431 if !first_digit.is_ascii_digit() {
432 return None;
433 }
434
435 index += 1;
436 while bytes
437 .get(index)
438 .copied()
439 .is_some_and(|byte| byte.is_ascii_digit())
440 {
441 index += 1;
442 }
443
444 Some(index)
445}
446
447#[cfg(test)]
448mod tests {
449 use super::lex;
450 use mel_syntax::{TokenKind, range_end, range_start, text_range};
451
452 fn token_kinds(input: &str) -> Vec<TokenKind> {
453 lex(input)
454 .tokens
455 .into_iter()
456 .map(|token| token.kind)
457 .collect()
458 }
459
460 #[test]
461 fn lexes_basic_statement() {
462 let kinds = token_kinds(r#"$foo = 1;"#);
463 assert_eq!(
464 kinds,
465 vec![
466 TokenKind::Dollar,
467 TokenKind::Ident,
468 TokenKind::Whitespace,
469 TokenKind::Assign,
470 TokenKind::Whitespace,
471 TokenKind::IntLiteral,
472 TokenKind::Semi,
473 TokenKind::Eof,
474 ]
475 );
476 }
477
478 #[test]
479 fn lexes_compound_assignment_and_updates() {
480 let kinds = token_kinds(r#"$foo += 1; $bar -= 2; $baz *= 3; $qux /= 4; $foo++; $foo--;"#);
481 assert_eq!(
482 kinds,
483 vec![
484 TokenKind::Dollar,
485 TokenKind::Ident,
486 TokenKind::Whitespace,
487 TokenKind::PlusEq,
488 TokenKind::Whitespace,
489 TokenKind::IntLiteral,
490 TokenKind::Semi,
491 TokenKind::Whitespace,
492 TokenKind::Dollar,
493 TokenKind::Ident,
494 TokenKind::Whitespace,
495 TokenKind::MinusEq,
496 TokenKind::Whitespace,
497 TokenKind::IntLiteral,
498 TokenKind::Semi,
499 TokenKind::Whitespace,
500 TokenKind::Dollar,
501 TokenKind::Ident,
502 TokenKind::Whitespace,
503 TokenKind::StarEq,
504 TokenKind::Whitespace,
505 TokenKind::IntLiteral,
506 TokenKind::Semi,
507 TokenKind::Whitespace,
508 TokenKind::Dollar,
509 TokenKind::Ident,
510 TokenKind::Whitespace,
511 TokenKind::SlashEq,
512 TokenKind::Whitespace,
513 TokenKind::IntLiteral,
514 TokenKind::Semi,
515 TokenKind::Whitespace,
516 TokenKind::Dollar,
517 TokenKind::Ident,
518 TokenKind::PlusPlus,
519 TokenKind::Semi,
520 TokenKind::Whitespace,
521 TokenKind::Dollar,
522 TokenKind::Ident,
523 TokenKind::MinusMinus,
524 TokenKind::Semi,
525 TokenKind::Eof,
526 ]
527 );
528 }
529
530 #[test]
531 fn lexes_backquoted_command() {
532 let kinds = token_kinds(r#"`ls -sl`;"#);
533 assert_eq!(
534 kinds,
535 vec![
536 TokenKind::Backquote,
537 TokenKind::Ident,
538 TokenKind::Whitespace,
539 TokenKind::Flag,
540 TokenKind::Backquote,
541 TokenKind::Semi,
542 TokenKind::Eof,
543 ]
544 );
545 }
546
547 #[test]
548 fn lexes_minus_before_ident_in_expression_as_minus() {
549 let kinds = token_kinds(r#"size($path)-size($sceneName);"#);
550 assert_eq!(
551 kinds,
552 vec![
553 TokenKind::Ident,
554 TokenKind::LParen,
555 TokenKind::Dollar,
556 TokenKind::Ident,
557 TokenKind::RParen,
558 TokenKind::Minus,
559 TokenKind::Ident,
560 TokenKind::LParen,
561 TokenKind::Dollar,
562 TokenKind::Ident,
563 TokenKind::RParen,
564 TokenKind::Semi,
565 TokenKind::Eof,
566 ]
567 );
568 }
569
570 #[test]
571 fn keeps_minus_ident_after_whitespace_as_flag() {
572 let kinds = token_kinds("optionVar -q Foo;");
573 assert_eq!(
574 kinds,
575 vec![
576 TokenKind::Ident,
577 TokenKind::Whitespace,
578 TokenKind::Flag,
579 TokenKind::Whitespace,
580 TokenKind::Ident,
581 TokenKind::Semi,
582 TokenKind::Eof,
583 ]
584 );
585 }
586
587 #[test]
588 fn lexes_exponent_float_literals() {
589 let input = "1.0e-3 1e+3 0.0e0 1E-9";
590 let lexed = lex(input);
591 let texts: Vec<_> = lexed
592 .tokens
593 .iter()
594 .filter(|token| !token.kind.is_trivia() && token.kind != TokenKind::Eof)
595 .map(|token| {
596 (
597 &input[range_start(token.range) as usize..range_end(token.range) as usize],
598 token.kind,
599 )
600 })
601 .collect();
602
603 assert_eq!(
604 texts,
605 vec![
606 ("1.0e-3", TokenKind::FloatLiteral),
607 ("1e+3", TokenKind::FloatLiteral),
608 ("0.0e0", TokenKind::FloatLiteral),
609 ("1E-9", TokenKind::FloatLiteral),
610 ]
611 );
612 }
613
614 #[test]
615 fn lexes_leading_dot_float_literals() {
616 let input = ".7 .001 .5e+2 .";
617 let lexed = lex(input);
618 let texts: Vec<_> = lexed
619 .tokens
620 .iter()
621 .filter(|token| !token.kind.is_trivia() && token.kind != TokenKind::Eof)
622 .map(|token| {
623 (
624 &input[range_start(token.range) as usize..range_end(token.range) as usize],
625 token.kind,
626 )
627 })
628 .collect();
629
630 assert_eq!(
631 texts,
632 vec![
633 (".7", TokenKind::FloatLiteral),
634 (".001", TokenKind::FloatLiteral),
635 (".5e+2", TokenKind::FloatLiteral),
636 (".", TokenKind::Dot),
637 ]
638 );
639 }
640
641 #[test]
642 fn lexes_trailing_dot_float_literals() {
643 let input = "1000. 0. -1000. 1.. 1.foo";
644 let lexed = lex(input);
645 let texts: Vec<_> = lexed
646 .tokens
647 .iter()
648 .filter(|token| !token.kind.is_trivia() && token.kind != TokenKind::Eof)
649 .map(|token| {
650 (
651 &input[range_start(token.range) as usize..range_end(token.range) as usize],
652 token.kind,
653 )
654 })
655 .collect();
656
657 assert_eq!(
658 texts,
659 vec![
660 ("1000.", TokenKind::FloatLiteral),
661 ("0.", TokenKind::FloatLiteral),
662 ("-", TokenKind::Minus),
663 ("1000.", TokenKind::FloatLiteral),
664 ("1", TokenKind::IntLiteral),
665 (".", TokenKind::Dot),
666 (".", TokenKind::Dot),
667 ("1", TokenKind::IntLiteral),
668 (".", TokenKind::Dot),
669 ("foo", TokenKind::Ident),
670 ]
671 );
672 }
673
674 #[test]
675 fn lexes_hex_integer_literals() {
676 let input = "0x8000 0X0001 42";
677 let lexed = lex(input);
678 let texts: Vec<_> = lexed
679 .tokens
680 .iter()
681 .filter(|token| !token.kind.is_trivia() && token.kind != TokenKind::Eof)
682 .map(|token| {
683 (
684 &input[range_start(token.range) as usize..range_end(token.range) as usize],
685 token.kind,
686 )
687 })
688 .collect();
689
690 assert_eq!(
691 texts,
692 vec![
693 ("0x8000", TokenKind::IntLiteral),
694 ("0X0001", TokenKind::IntLiteral),
695 ("42", TokenKind::IntLiteral),
696 ]
697 );
698 }
699
700 #[test]
701 fn lexes_caret_operator() {
702 let kinds = token_kinds("vector $cross = $a ^ $b;");
703 assert_eq!(
704 kinds,
705 vec![
706 TokenKind::Ident,
707 TokenKind::Whitespace,
708 TokenKind::Dollar,
709 TokenKind::Ident,
710 TokenKind::Whitespace,
711 TokenKind::Assign,
712 TokenKind::Whitespace,
713 TokenKind::Dollar,
714 TokenKind::Ident,
715 TokenKind::Whitespace,
716 TokenKind::Caret,
717 TokenKind::Whitespace,
718 TokenKind::Dollar,
719 TokenKind::Ident,
720 TokenKind::Semi,
721 TokenKind::Eof,
722 ]
723 );
724 }
725
726 #[test]
727 fn malformed_exponent_suffix_stays_split() {
728 let kinds = token_kinds("1e+ 1.0e 1e-");
729 assert_eq!(
730 kinds,
731 vec![
732 TokenKind::IntLiteral,
733 TokenKind::Ident,
734 TokenKind::Plus,
735 TokenKind::Whitespace,
736 TokenKind::FloatLiteral,
737 TokenKind::Ident,
738 TokenKind::Whitespace,
739 TokenKind::IntLiteral,
740 TokenKind::Ident,
741 TokenKind::Minus,
742 TokenKind::Eof,
743 ]
744 );
745 }
746
747 #[test]
748 fn lexes_vector_literals_and_components() {
749 let kinds = token_kinds(r#"$dir = <<1, 2, 3>>; $x = $dir.x;"#);
750 assert_eq!(
751 kinds,
752 vec![
753 TokenKind::Dollar,
754 TokenKind::Ident,
755 TokenKind::Whitespace,
756 TokenKind::Assign,
757 TokenKind::Whitespace,
758 TokenKind::LtLt,
759 TokenKind::IntLiteral,
760 TokenKind::Comma,
761 TokenKind::Whitespace,
762 TokenKind::IntLiteral,
763 TokenKind::Comma,
764 TokenKind::Whitespace,
765 TokenKind::IntLiteral,
766 TokenKind::GtGt,
767 TokenKind::Semi,
768 TokenKind::Whitespace,
769 TokenKind::Dollar,
770 TokenKind::Ident,
771 TokenKind::Whitespace,
772 TokenKind::Assign,
773 TokenKind::Whitespace,
774 TokenKind::Dollar,
775 TokenKind::Ident,
776 TokenKind::Dot,
777 TokenKind::Ident,
778 TokenKind::Semi,
779 TokenKind::Eof,
780 ]
781 );
782 }
783
784 #[test]
785 fn lexes_single_pipe_for_dag_paths() {
786 let kinds = token_kinds("|pSphere1|pSphereShape1.instObjGroups[0]");
787 assert_eq!(
788 kinds,
789 vec![
790 TokenKind::Pipe,
791 TokenKind::Ident,
792 TokenKind::Pipe,
793 TokenKind::Ident,
794 TokenKind::Dot,
795 TokenKind::Ident,
796 TokenKind::LBracket,
797 TokenKind::IntLiteral,
798 TokenKind::RBracket,
799 TokenKind::Eof,
800 ]
801 );
802 }
803
804 #[test]
805 fn keeps_double_pipe_as_boolean_or() {
806 let kinds = token_kinds("$a || $b");
807 assert_eq!(
808 kinds,
809 vec![
810 TokenKind::Dollar,
811 TokenKind::Ident,
812 TokenKind::Whitespace,
813 TokenKind::OrOr,
814 TokenKind::Whitespace,
815 TokenKind::Dollar,
816 TokenKind::Ident,
817 TokenKind::Eof,
818 ]
819 );
820 }
821
822 #[test]
823 fn retains_trivia_tokens() {
824 let kinds = token_kinds("// lead\n$foo /* mid */ = 1;");
825 assert_eq!(
826 kinds,
827 vec![
828 TokenKind::LineComment,
829 TokenKind::Whitespace,
830 TokenKind::Dollar,
831 TokenKind::Ident,
832 TokenKind::Whitespace,
833 TokenKind::BlockComment,
834 TokenKind::Whitespace,
835 TokenKind::Assign,
836 TokenKind::Whitespace,
837 TokenKind::IntLiteral,
838 TokenKind::Semi,
839 TokenKind::Eof,
840 ]
841 );
842 }
843
844 #[test]
845 fn unterminated_string_produces_diagnostic() {
846 let lexed = lex("\"unterminated");
847 assert_eq!(lexed.tokens.len(), 2);
848 assert_eq!(lexed.tokens[0].kind, TokenKind::StringLiteral);
849 assert_eq!(lexed.tokens[0].range, text_range(0, 13));
850 assert_eq!(lexed.tokens[1].kind, TokenKind::Eof);
851 assert_eq!(lexed.tokens[1].range, text_range(13, 13));
852 assert_eq!(lexed.diagnostics.len(), 1);
853 assert_eq!(lexed.diagnostics[0].message, "unterminated string literal");
854 assert_eq!(lexed.diagnostics[0].range, text_range(0, 13));
855 }
856
857 #[test]
858 fn unterminated_block_comment_produces_diagnostic() {
859 let lexed = lex("/* unterminated");
860 assert_eq!(lexed.tokens.len(), 2);
861 assert_eq!(lexed.tokens[0].kind, TokenKind::BlockComment);
862 assert_eq!(lexed.tokens[0].range, text_range(0, 15));
863 assert_eq!(lexed.tokens[1].kind, TokenKind::Eof);
864 assert_eq!(lexed.tokens[1].range, text_range(15, 15));
865 assert_eq!(lexed.diagnostics.len(), 1);
866 assert_eq!(lexed.diagnostics[0].message, "unterminated block comment");
867 assert_eq!(lexed.diagnostics[0].range, text_range(0, 15));
868 }
869
870 #[test]
871 fn unknown_character_produces_token_and_diagnostic() {
872 let lexed = lex("@");
873 assert_eq!(lexed.tokens.len(), 2);
874 assert_eq!(lexed.tokens[0].kind, TokenKind::Unknown);
875 assert_eq!(lexed.tokens[0].range, text_range(0, 1));
876 assert_eq!(lexed.diagnostics.len(), 1);
877 assert_eq!(lexed.diagnostics[0].message, "unknown character");
878 assert_eq!(lexed.diagnostics[0].range, text_range(0, 1));
879 }
880
881 #[test]
882 fn unknown_utf8_codepoint_produces_single_token_and_diagnostic() {
883 let lexed = lex("😀");
884 assert_eq!(lexed.tokens.len(), 2);
885 assert_eq!(lexed.tokens[0].kind, TokenKind::Unknown);
886 assert_eq!(lexed.tokens[0].range, text_range(0, 4));
887 assert_eq!(lexed.tokens[1].kind, TokenKind::Eof);
888 assert_eq!(lexed.tokens[1].range, text_range(4, 4));
889 assert_eq!(lexed.diagnostics.len(), 1);
890 assert_eq!(lexed.diagnostics[0].message, "unknown character");
891 assert_eq!(lexed.diagnostics[0].range, text_range(0, 4));
892 }
893}