1use std::num::IntErrorKind;
19
20use crate::errors::QalaError;
21use crate::span::Span;
22use crate::token::{Token, TokenKind};
23
24enum Mode {
32 Normal,
34 StrText { open_quote: usize, saw_interp: bool },
40 Interp { brace_depth: u32, open_brace: usize },
45}
46
47pub struct Lexer<'src> {
50 src: &'src str,
51 bytes: &'src [u8],
52 pos: usize,
53 mode_stack: Vec<Mode>,
55}
56
57impl<'src> Lexer<'src> {
58 pub fn tokenize(src: &'src str) -> Result<Vec<Token>, QalaError> {
65 let mut lx = Lexer {
66 src,
67 bytes: src.as_bytes(),
68 pos: 0,
69 mode_stack: vec![Mode::Normal],
70 };
71 lx.skip_bom();
72 let mut out = Vec::new();
73 loop {
74 if matches!(lx.mode_stack.last(), Some(Mode::StrText { .. })) {
79 lx.scan_string_body(&mut out)?;
80 continue;
81 }
82 lx.skip_trivia();
83 let start = lx.pos;
84 match lx.peek() {
85 None => {
86 if let Some(err) = lx.unterminated_at_eof() {
89 return Err(err);
90 }
91 out.push(Token::new(TokenKind::Eof, Span::new(start, 0)));
92 break;
93 }
94 Some(b'"') => {
99 lx.pos += 1; lx.mode_stack.push(Mode::StrText {
101 open_quote: start,
102 saw_interp: false,
103 });
104 }
105 Some(b) => {
106 let kind = lx.scan_token(b, start)?;
107 out.push(Token::new(kind, Span::new(start, lx.pos - start)));
108 }
109 }
110 }
111 Ok(out)
112 }
113
114 fn peek(&self) -> Option<u8> {
118 self.bytes.get(self.pos).copied()
119 }
120
121 fn peek2(&self) -> Option<u8> {
123 self.bytes.get(self.pos + 1).copied()
124 }
125
126 fn bump(&mut self) -> Option<u8> {
128 let b = self.peek();
129 if b.is_some() {
130 self.pos += 1;
131 }
132 b
133 }
134
135 fn skip_bom(&mut self) {
140 if self.src.starts_with('\u{FEFF}') {
141 self.pos += '\u{FEFF}'.len_utf8();
142 }
143 }
144
145 fn skip_trivia(&mut self) {
149 loop {
150 match self.peek() {
151 Some(b) if b.is_ascii_whitespace() => {
152 self.pos += 1;
153 }
154 Some(b'/') if self.peek2() == Some(b'/') => {
155 self.pos += 2;
157 while let Some(b) = self.peek() {
158 if b == b'\n' {
159 break;
160 }
161 self.pos += 1;
162 }
163 }
164 _ => break,
165 }
166 }
167 }
168
169 fn scan_token(&mut self, b: u8, start: usize) -> Result<TokenKind, QalaError> {
177 if b == b'_' || b.is_ascii_alphabetic() {
179 if b == b'b' && self.peek2() == Some(b'\'') {
183 return self.scan_byte_literal(start);
184 }
185 return Ok(self.scan_identifier());
186 }
187 if b.is_ascii_digit() {
188 return self.scan_number(start);
189 }
190 if (b == b'{' || b == b'}') && matches!(self.mode_stack.last(), Some(Mode::Interp { .. })) {
192 return Ok(self.scan_brace_in_interp(b));
193 }
194 match b {
195 b'(' => {
196 self.pos += 1;
197 Ok(TokenKind::LParen)
198 }
199 b')' => {
200 self.pos += 1;
201 Ok(TokenKind::RParen)
202 }
203 b'[' => {
204 self.pos += 1;
205 Ok(TokenKind::LBracket)
206 }
207 b']' => {
208 self.pos += 1;
209 Ok(TokenKind::RBracket)
210 }
211 b'{' => {
212 self.pos += 1;
213 Ok(TokenKind::LBrace)
214 }
215 b'}' => {
216 self.pos += 1;
217 Ok(TokenKind::RBrace)
218 }
219 b',' => {
220 self.pos += 1;
221 Ok(TokenKind::Comma)
222 }
223 b':' => {
224 self.pos += 1;
225 Ok(TokenKind::Colon)
226 }
227 b';' => {
228 self.pos += 1;
229 Ok(TokenKind::Semi)
230 }
231 b'+' => {
232 self.pos += 1;
233 Ok(TokenKind::Plus)
234 }
235 b'*' => {
236 self.pos += 1;
237 Ok(TokenKind::Star)
238 }
239 b'%' => {
240 self.pos += 1;
241 Ok(TokenKind::Percent)
242 }
243 b'?' => {
244 self.pos += 1;
245 Ok(TokenKind::Question)
246 }
247 b'/' => {
249 self.pos += 1;
250 Ok(TokenKind::Slash)
251 }
252 b'.' => {
255 self.pos += 1;
256 if self.peek() == Some(b'.') {
257 self.pos += 1;
258 if self.peek() == Some(b'=') {
259 self.pos += 1;
260 Ok(TokenKind::DotDotEq)
261 } else {
262 Ok(TokenKind::DotDot)
263 }
264 } else {
265 Ok(TokenKind::Dot)
266 }
267 }
268 b'-' => {
269 self.pos += 1;
270 if self.peek() == Some(b'>') {
271 self.pos += 1;
272 Ok(TokenKind::Arrow)
273 } else {
274 Ok(TokenKind::Minus)
275 }
276 }
277 b'=' => {
278 self.pos += 1;
279 match self.peek() {
280 Some(b'=') => {
281 self.pos += 1;
282 Ok(TokenKind::EqEq)
283 }
284 Some(b'>') => {
285 self.pos += 1;
286 Ok(TokenKind::FatArrow)
287 }
288 _ => Ok(TokenKind::Eq),
289 }
290 }
291 b'!' => {
292 self.pos += 1;
293 if self.peek() == Some(b'=') {
294 self.pos += 1;
295 Ok(TokenKind::BangEq)
296 } else {
297 Ok(TokenKind::Bang)
298 }
299 }
300 b'<' => {
301 self.pos += 1;
302 if self.peek() == Some(b'=') {
303 self.pos += 1;
304 Ok(TokenKind::LtEq)
305 } else {
306 Ok(TokenKind::Lt)
307 }
308 }
309 b'>' => {
310 self.pos += 1;
311 if self.peek() == Some(b'=') {
312 self.pos += 1;
313 Ok(TokenKind::GtEq)
314 } else {
315 Ok(TokenKind::Gt)
316 }
317 }
318 b'&' => {
321 if self.peek2() == Some(b'&') {
322 self.pos += 2;
323 Ok(TokenKind::AmpAmp)
324 } else {
325 let span = Span::new(start, 1);
326 self.pos += 1;
327 Err(QalaError::UnexpectedChar { span, ch: '&' })
328 }
329 }
330 b'|' => match self.peek2() {
333 Some(b'|') => {
334 self.pos += 2;
335 Ok(TokenKind::PipePipe)
336 }
337 Some(b'>') => {
338 self.pos += 2;
339 Ok(TokenKind::PipeGt)
340 }
341 _ => {
342 let span = Span::new(start, 1);
343 self.pos += 1;
344 Err(QalaError::UnexpectedChar { span, ch: '|' })
345 }
346 },
347 _ if b >= 0x80 => {
351 let ch = self.src[self.pos..].chars().next().unwrap_or('\u{FFFD}');
352 let len = ch.len_utf8();
353 let span = Span::new(start, len);
354 self.pos += len;
355 Err(QalaError::UnexpectedChar { span, ch })
356 }
357 _ => {
360 let span = Span::new(start, 1);
361 self.pos += 1;
362 Err(QalaError::UnexpectedChar {
363 span,
364 ch: b as char,
365 })
366 }
367 }
368 }
369
370 fn scan_identifier(&mut self) -> TokenKind {
373 let start = self.pos;
374 while let Some(b) = self.peek() {
375 if b == b'_' || b.is_ascii_alphanumeric() {
376 self.pos += 1;
377 } else {
378 break;
379 }
380 }
381 let text = &self.src[start..self.pos];
382 match crate::token::keyword(text) {
383 Some(kw) => kw,
384 None => TokenKind::Ident(text.to_string()),
385 }
386 }
387
388 fn scan_brace_in_interp(&mut self, b: u8) -> TokenKind {
393 self.pos += 1; match b {
395 b'{' => {
396 if let Some(Mode::Interp { brace_depth, .. }) = self.mode_stack.last_mut() {
397 *brace_depth += 1;
398 }
399 TokenKind::LBrace
400 }
401 _ => {
403 let at_depth_zero = matches!(
404 self.mode_stack.last(),
405 Some(Mode::Interp { brace_depth: 0, .. })
406 );
407 if at_depth_zero {
408 self.mode_stack.pop(); TokenKind::InterpEnd
410 } else {
411 if let Some(Mode::Interp { brace_depth, .. }) = self.mode_stack.last_mut() {
412 *brace_depth -= 1;
414 }
415 TokenKind::RBrace
416 }
417 }
418 }
419 }
420
421 fn scan_number(&mut self, start: usize) -> Result<TokenKind, QalaError> {
436 if self.peek() == Some(b'0') {
439 match self.peek2() {
440 Some(b'x') | Some(b'X') => return self.scan_radix_int(start, 16),
441 Some(b'b') | Some(b'B') => return self.scan_radix_int(start, 2),
442 _ => {}
443 }
444 }
445 self.scan_decimal(start)
446 }
447
448 fn scan_radix_int(&mut self, start: usize, radix: u32) -> Result<TokenKind, QalaError> {
454 self.pos += 2; let body_start = self.pos;
456 let is_digit = |b: u8| match radix {
457 16 => b.is_ascii_hexdigit(),
458 2 => b == b'0' || b == b'1',
459 _ => false,
460 };
461 let mut last_was_underscore = false;
463 let mut saw_digit = false;
464 let mut malformed = false;
465 while let Some(b) = self.peek() {
466 if is_digit(b) {
467 last_was_underscore = false;
468 saw_digit = true;
469 self.pos += 1;
470 } else if b == b'_' {
471 if !saw_digit || last_was_underscore {
474 malformed = true;
475 }
476 last_was_underscore = true;
477 self.pos += 1;
478 } else {
479 break;
480 }
481 }
482 if self
487 .peek()
488 .is_some_and(|b| b == b'_' || b.is_ascii_alphanumeric())
489 {
490 self.consume_alnum_tail();
491 malformed = true;
492 }
493 if malformed || !saw_digit || last_was_underscore {
494 return Err(self.malformed_number(start));
495 }
496 let digits: String = self.src[body_start..self.pos]
497 .chars()
498 .filter(|&c| c != '_')
499 .collect();
500 match i64::from_str_radix(&digits, radix) {
501 Ok(value) => Ok(TokenKind::Int(value)),
502 Err(e) => Err(self.int_parse_error(start, e.kind())),
503 }
504 }
505
506 fn scan_decimal(&mut self, start: usize) -> Result<TokenKind, QalaError> {
513 let mut is_float = false;
514
515 self.scan_digit_run(start)?;
519
520 if self.peek() == Some(b'.') {
526 match self.peek2() {
527 Some(b) if b.is_ascii_digit() => {
528 is_float = true;
529 self.pos += 1; self.scan_digit_run(start)?;
531 }
532 Some(b'_') => {
533 self.pos += 1; self.consume_alnum_tail();
535 return Err(self.malformed_number(start));
536 }
537 _ => {}
538 }
539 }
540
541 if matches!(self.peek(), Some(b'e') | Some(b'E')) {
544 is_float = true;
545 self.pos += 1; if matches!(self.peek(), Some(b'+') | Some(b'-')) {
547 self.pos += 1;
548 }
549 if !self.peek().is_some_and(|b| b.is_ascii_digit()) {
550 self.consume_alnum_tail();
553 return Err(self.malformed_number(start));
554 }
555 self.scan_digit_run(start)?;
556 }
557
558 if self
561 .peek()
562 .is_some_and(|b| b == b'_' || b.is_ascii_alphanumeric())
563 {
564 self.consume_alnum_tail();
565 return Err(self.malformed_number(start));
566 }
567
568 let text: String = self.src[start..self.pos]
569 .chars()
570 .filter(|&c| c != '_')
571 .collect();
572 if is_float {
573 match text.parse::<f64>() {
574 Ok(value) => Ok(TokenKind::Float(value)),
575 Err(_) => Err(self.malformed_number(start)),
579 }
580 } else {
581 match text.parse::<i64>() {
582 Ok(value) => Ok(TokenKind::Int(value)),
583 Err(e) => Err(self.int_parse_error(start, e.kind())),
584 }
585 }
586 }
587
588 fn scan_digit_run(&mut self, literal_start: usize) -> Result<(), QalaError> {
593 let mut last_was_underscore = false;
594 let mut saw_digit = false;
595 if self.peek() == Some(b'_') {
597 self.consume_alnum_tail();
598 return Err(self.malformed_number(literal_start));
599 }
600 while let Some(b) = self.peek() {
601 if b.is_ascii_digit() {
602 last_was_underscore = false;
603 saw_digit = true;
604 self.pos += 1;
605 } else if b == b'_' {
606 if last_was_underscore {
607 self.consume_alnum_tail();
609 return Err(self.malformed_number(literal_start));
610 }
611 last_was_underscore = true;
612 self.pos += 1;
613 } else {
614 break;
615 }
616 }
617 if last_was_underscore || !saw_digit {
620 self.consume_alnum_tail();
621 return Err(self.malformed_number(literal_start));
622 }
623 Ok(())
624 }
625
626 fn consume_alnum_tail(&mut self) {
631 while let Some(b) = self.peek() {
632 if b == b'_' || b.is_ascii_alphanumeric() || b == b'.' {
633 self.pos += 1;
634 } else {
635 break;
636 }
637 }
638 }
639
640 fn malformed_number(&self, literal_start: usize) -> QalaError {
643 let text = &self.src[literal_start..self.pos];
644 QalaError::MalformedNumber {
645 span: Span::new(literal_start, self.pos - literal_start),
646 message: format!("`{text}`"),
647 }
648 }
649
650 fn int_parse_error(&self, literal_start: usize, kind: &IntErrorKind) -> QalaError {
654 match kind {
655 IntErrorKind::PosOverflow | IntErrorKind::NegOverflow => QalaError::IntOverflow {
656 span: Span::new(literal_start, self.pos - literal_start),
657 },
658 _ => self.malformed_number(literal_start),
659 }
660 }
661
662 fn scan_byte_literal(&mut self, start: usize) -> Result<TokenKind, QalaError> {
671 self.pos += 2; let value: u8 = match self.peek() {
673 Some(b'\'') => {
675 self.pos += 1;
676 return Err(
677 self.bad_byte_literal(start, "expected one character between `b'` and `'`")
678 );
679 }
680 Some(b'\\') => {
682 self.pos += 1;
683 match self.bump() {
684 Some(b'n') => b'\n',
685 Some(b't') => b'\t',
686 Some(b'r') => b'\r',
687 Some(b'0') => 0,
688 Some(b'\\') => b'\\',
689 Some(b'\'') => b'\'',
690 _ => {
691 if self.peek() == Some(b'\'') {
693 self.pos += 1;
694 }
695 return Err(self.bad_byte_literal(start, "unknown escape in byte literal"));
696 }
697 }
698 }
699 Some(b) if b >= 0x80 => {
701 let ch = self.src[self.pos..].chars().next().unwrap_or('\u{FFFD}');
702 self.pos += ch.len_utf8();
703 if self.peek() == Some(b'\'') {
705 self.pos += 1;
706 }
707 return Err(
708 self.bad_byte_literal(start, "byte literal must be a single ASCII character")
709 );
710 }
711 Some(b'\n') | None => {
713 return Err(self.bad_byte_literal(start, "unterminated byte literal"));
714 }
715 Some(b) => {
717 self.pos += 1;
718 b
719 }
720 };
721 match self.peek() {
725 Some(b'\'') => {
726 self.pos += 1;
727 Ok(TokenKind::Byte(value))
728 }
729 _ => {
730 while let Some(b) = self.peek() {
731 if b == b'\'' {
732 self.pos += 1;
733 break;
734 }
735 if b == b'\n' {
736 break;
737 }
738 self.pos += 1;
739 }
740 Err(self.bad_byte_literal(start, "byte literal must be exactly one character"))
741 }
742 }
743 }
744
745 fn bad_byte_literal(&self, literal_start: usize, message: &str) -> QalaError {
748 QalaError::BadByteLiteral {
749 span: Span::new(literal_start, self.pos - literal_start),
750 message: message.to_string(),
751 }
752 }
753
754 fn scan_string_body(&mut self, out: &mut Vec<Token>) -> Result<(), QalaError> {
779 let (open_quote, saw_interp) = match self.mode_stack.last() {
784 Some(Mode::StrText {
785 open_quote,
786 saw_interp,
787 }) => (*open_quote, *saw_interp),
788 _ => return Ok(()), };
790 let text_start = self.pos;
791 let mut buf = String::new();
792 loop {
793 match self.peek() {
794 Some(b'"') => {
796 self.pos += 1; self.mode_stack.pop(); if saw_interp {
799 let span = Span::new(text_start, self.pos - text_start);
802 out.push(Token::new(TokenKind::StrEnd(buf), span));
803 } else {
804 let span = Span::new(open_quote, self.pos - open_quote);
806 out.push(Token::new(TokenKind::Str(buf), span));
807 }
808 return Ok(());
809 }
810 Some(b'{') => {
812 let brace_pos = self.pos;
813 if saw_interp {
815 let span = Span::new(text_start, brace_pos - text_start);
818 out.push(Token::new(TokenKind::StrMid(buf), span));
819 } else {
820 let span = Span::new(open_quote, brace_pos - open_quote);
823 out.push(Token::new(TokenKind::StrStart(buf), span));
824 }
825 if let Some(Mode::StrText { saw_interp, .. }) = self.mode_stack.last_mut() {
828 *saw_interp = true;
829 }
830 self.pos += 1; out.push(Token::new(TokenKind::InterpStart, Span::new(brace_pos, 1)));
833 self.mode_stack.push(Mode::Interp {
834 brace_depth: 0,
835 open_brace: brace_pos,
836 });
837 return Ok(());
838 }
839 Some(b'\\') => {
841 let backslash_pos = self.pos;
842 self.pos += 1; match self.bump() {
844 Some(b'n') => buf.push('\n'),
845 Some(b't') => buf.push('\t'),
846 Some(b'r') => buf.push('\r'),
847 Some(b'0') => buf.push('\0'),
848 Some(b'\\') => buf.push('\\'),
849 Some(b'"') => buf.push('"'),
850 Some(b'{') => buf.push('{'),
851 Some(b'}') => buf.push('}'),
852 Some(b'u') => {
853 let ch = self.scan_unicode_escape(backslash_pos)?;
854 buf.push(ch);
855 }
856 _ => {
857 return Err(QalaError::InvalidEscape {
858 span: Span::new(backslash_pos, 1),
859 message: "unknown escape sequence".to_string(),
860 });
861 }
862 }
863 }
864 Some(b'\n') | None => {
866 return Err(QalaError::UnterminatedString {
867 span: Span::new(open_quote, 1),
868 });
869 }
870 Some(_) => {
875 let ch = self.src[self.pos..].chars().next().unwrap_or('\u{FFFD}');
876 self.pos += ch.len_utf8();
877 buf.push(ch);
878 }
879 }
880 }
881 }
882
883 fn scan_unicode_escape(&mut self, backslash_pos: usize) -> Result<char, QalaError> {
890 let bad = |message: &str| QalaError::InvalidEscape {
891 span: Span::new(backslash_pos, 1),
892 message: message.to_string(),
893 };
894 if self.peek() != Some(b'{') {
895 return Err(bad("expected `{` after `\\u`"));
896 }
897 self.pos += 1; let hex_start = self.pos;
899 let mut count = 0usize;
900 while let Some(b) = self.peek() {
901 if b.is_ascii_hexdigit() && count < 6 {
902 count += 1;
903 self.pos += 1;
904 } else {
905 break;
906 }
907 }
908 if count == 0 {
909 return Err(bad("expected 1 to 6 hex digits in `\\u{...}`"));
910 }
911 if self.peek() != Some(b'}') {
912 return Err(bad("expected `}` to close `\\u{...}`"));
913 }
914 let hex = &self.src[hex_start..self.pos];
915 self.pos += 1; let code = u32::from_str_radix(hex, 16).map_err(|_| bad("invalid `\\u{...}` codepoint"))?;
919 char::from_u32(code).ok_or_else(|| bad("invalid Unicode codepoint in `\\u{...}`"))
920 }
921
922 fn unterminated_at_eof(&self) -> Option<QalaError> {
930 match self.mode_stack.last() {
931 Some(Mode::StrText { open_quote, .. }) => Some(QalaError::UnterminatedString {
932 span: Span::new(*open_quote, 1),
933 }),
934 Some(Mode::Interp { open_brace, .. }) => Some(QalaError::UnterminatedInterpolation {
935 span: Span::new(*open_brace, 1),
936 }),
937 _ => None,
938 }
939 }
940}
941
942#[cfg(test)]
943mod tests {
944 use super::*;
945
946 fn kinds(src: &str) -> Vec<TokenKind> {
949 Lexer::tokenize(src)
950 .expect("expected successful tokenize")
951 .into_iter()
952 .map(|t| t.kind)
953 .collect()
954 }
955
956 fn err(src: &str) -> QalaError {
958 Lexer::tokenize(src).expect_err("expected a lex error")
959 }
960
961 #[test]
962 fn empty_source_is_just_eof_at_offset_zero() {
963 let toks = Lexer::tokenize("").unwrap();
964 assert_eq!(toks.len(), 1);
965 assert_eq!(toks[0].kind, TokenKind::Eof);
966 assert_eq!(toks[0].span, Span::new(0, 0));
967 }
968
969 #[test]
970 fn whitespace_only_source_is_just_eof() {
971 assert_eq!(kinds(" \n\t \r\n"), vec![TokenKind::Eof]);
972 }
973
974 #[test]
975 fn comment_only_source_is_just_eof() {
976 assert_eq!(kinds("// hi\n \n"), vec![TokenKind::Eof]);
977 assert_eq!(kinds("// just a comment, no newline"), vec![TokenKind::Eof]);
978 assert_eq!(kinds("//\n"), vec![TokenKind::Eof]);
980 }
981
982 #[test]
983 fn whitespace_and_comments_mixed_is_just_eof() {
984 assert_eq!(kinds(" \n\t // hi\n // more\n "), vec![TokenKind::Eof]);
985 }
986
987 #[test]
988 fn leading_bom_is_skipped_silently() {
989 let toks = kinds("\u{FEFF}fn main(){}");
990 assert_eq!(
991 toks,
992 vec![
993 TokenKind::Fn,
994 TokenKind::Ident("main".to_string()),
995 TokenKind::LParen,
996 TokenKind::RParen,
997 TokenKind::LBrace,
998 TokenKind::RBrace,
999 TokenKind::Eof,
1000 ]
1001 );
1002 assert_eq!(kinds("\u{FEFF}"), vec![TokenKind::Eof]);
1004 }
1005
1006 #[test]
1007 fn identifiers_lex_with_their_text() {
1008 for name in ["_x", "x1", "__", "fooBar", "_", "main", "abc123", "X"] {
1009 assert_eq!(
1010 kinds(name),
1011 vec![TokenKind::Ident(name.to_string()), TokenKind::Eof],
1012 "identifier {name:?}"
1013 );
1014 }
1015 }
1016
1017 #[test]
1018 fn reserved_words_lex_to_keyword_kinds_not_idents() {
1019 let cases: &[(&str, TokenKind)] = &[
1020 ("fn", TokenKind::Fn),
1021 ("let", TokenKind::Let),
1022 ("mut", TokenKind::Mut),
1023 ("if", TokenKind::If),
1024 ("else", TokenKind::Else),
1025 ("while", TokenKind::While),
1026 ("for", TokenKind::For),
1027 ("in", TokenKind::In),
1028 ("return", TokenKind::Return),
1029 ("break", TokenKind::Break),
1030 ("continue", TokenKind::Continue),
1031 ("defer", TokenKind::Defer),
1032 ("match", TokenKind::Match),
1033 ("struct", TokenKind::Struct),
1034 ("enum", TokenKind::Enum),
1035 ("interface", TokenKind::Interface),
1036 ("comptime", TokenKind::Comptime),
1037 ("is", TokenKind::Is),
1038 ("pure", TokenKind::Pure),
1039 ("io", TokenKind::Io),
1040 ("alloc", TokenKind::Alloc),
1041 ("panic", TokenKind::Panic),
1042 ("or", TokenKind::Or),
1043 ("self", TokenKind::SelfKw),
1044 ];
1045 for (src, kind) in cases {
1046 assert_eq!(
1047 kinds(src),
1048 vec![kind.clone(), TokenKind::Eof],
1049 "keyword {src:?}"
1050 );
1051 }
1052 }
1053
1054 #[test]
1055 fn true_and_false_are_boolean_keyword_kinds() {
1056 assert_eq!(kinds("true"), vec![TokenKind::True, TokenKind::Eof]);
1057 assert_eq!(kinds("false"), vec![TokenKind::False, TokenKind::Eof]);
1058 }
1059
1060 #[test]
1061 fn primitive_type_names_are_keyword_kinds() {
1062 assert_eq!(kinds("i64"), vec![TokenKind::I64Ty, TokenKind::Eof]);
1063 assert_eq!(kinds("f64"), vec![TokenKind::F64Ty, TokenKind::Eof]);
1064 assert_eq!(kinds("bool"), vec![TokenKind::BoolTy, TokenKind::Eof]);
1065 assert_eq!(kinds("str"), vec![TokenKind::StrTy, TokenKind::Eof]);
1066 assert_eq!(kinds("byte"), vec![TokenKind::ByteTy, TokenKind::Eof]);
1067 assert_eq!(kinds("void"), vec![TokenKind::VoidTy, TokenKind::Eof]);
1068 }
1069
1070 #[test]
1071 fn stdlib_and_result_family_names_lex_as_identifiers() {
1072 for name in [
1073 "Result", "Option", "Ok", "Err", "Some", "None", "println", "open", "map", "filter",
1074 "reduce", "print", "len", "push", "pop", "sqrt", "abs", "assert",
1075 ] {
1076 assert_eq!(
1077 kinds(name),
1078 vec![TokenKind::Ident(name.to_string()), TokenKind::Eof],
1079 "{name:?} should be an identifier"
1080 );
1081 }
1082 }
1083
1084 #[test]
1085 fn operators_and_punctuation_lex_one_per_kind() {
1086 let cases: &[(&str, TokenKind)] = &[
1087 ("+", TokenKind::Plus),
1088 ("-", TokenKind::Minus),
1089 ("*", TokenKind::Star),
1090 ("/", TokenKind::Slash),
1091 ("%", TokenKind::Percent),
1092 ("<", TokenKind::Lt),
1093 (">", TokenKind::Gt),
1094 ("!", TokenKind::Bang),
1095 ("=", TokenKind::Eq),
1096 (".", TokenKind::Dot),
1097 (",", TokenKind::Comma),
1098 (":", TokenKind::Colon),
1099 (";", TokenKind::Semi),
1100 ("(", TokenKind::LParen),
1101 (")", TokenKind::RParen),
1102 ("[", TokenKind::LBracket),
1103 ("]", TokenKind::RBracket),
1104 ("{", TokenKind::LBrace),
1105 ("}", TokenKind::RBrace),
1106 ("?", TokenKind::Question),
1107 ];
1108 for (src, kind) in cases {
1109 assert_eq!(
1110 kinds(src),
1111 vec![kind.clone(), TokenKind::Eof],
1112 "operator {src:?}"
1113 );
1114 }
1115 }
1116
1117 #[test]
1118 fn maximal_munch_pairs_and_triples() {
1119 let cases: &[(&str, TokenKind)] = &[
1120 ("->", TokenKind::Arrow),
1121 ("=>", TokenKind::FatArrow),
1122 ("|>", TokenKind::PipeGt),
1123 ("..", TokenKind::DotDot),
1124 ("..=", TokenKind::DotDotEq),
1125 ("==", TokenKind::EqEq),
1126 ("!=", TokenKind::BangEq),
1127 ("<=", TokenKind::LtEq),
1128 (">=", TokenKind::GtEq),
1129 ("&&", TokenKind::AmpAmp),
1130 ("||", TokenKind::PipePipe),
1131 ];
1132 for (src, kind) in cases {
1133 assert_eq!(
1134 kinds(src),
1135 vec![kind.clone(), TokenKind::Eof],
1136 "operator {src:?}"
1137 );
1138 }
1139 assert_eq!(
1141 kinds("a -> b"),
1142 vec![
1143 TokenKind::Ident("a".to_string()),
1144 TokenKind::Arrow,
1145 TokenKind::Ident("b".to_string()),
1146 TokenKind::Eof,
1147 ]
1148 );
1149 assert_eq!(
1151 kinds("a => b"),
1152 vec![
1153 TokenKind::Ident("a".to_string()),
1154 TokenKind::FatArrow,
1155 TokenKind::Ident("b".to_string()),
1156 TokenKind::Eof,
1157 ]
1158 );
1159 }
1160
1161 #[test]
1162 fn line_comments_are_skipped_but_slash_is_division() {
1163 assert_eq!(
1165 kinds("a // comment\nb"),
1166 vec![
1167 TokenKind::Ident("a".to_string()),
1168 TokenKind::Ident("b".to_string()),
1169 TokenKind::Eof,
1170 ]
1171 );
1172 assert_eq!(
1174 kinds("/x"),
1175 vec![
1176 TokenKind::Slash,
1177 TokenKind::Ident("x".to_string()),
1178 TokenKind::Eof
1179 ]
1180 );
1181 assert_eq!(
1182 kinds("a / b"),
1183 vec![
1184 TokenKind::Ident("a".to_string()),
1185 TokenKind::Slash,
1186 TokenKind::Ident("b".to_string()),
1187 TokenKind::Eof,
1188 ]
1189 );
1190 }
1191
1192 #[test]
1193 fn a_lone_ampersand_is_unexpected_char() {
1194 match err("a & b") {
1195 QalaError::UnexpectedChar { span, ch } => {
1196 assert_eq!(ch, '&');
1197 assert_eq!(span, Span::new(2, 1));
1198 }
1199 other => panic!("expected UnexpectedChar, got {other:?}"),
1200 }
1201 assert!(matches!(
1203 err("&"),
1204 QalaError::UnexpectedChar { ch: '&', .. }
1205 ));
1206 }
1207
1208 #[test]
1209 fn a_lone_pipe_is_unexpected_char() {
1210 match err("a | b") {
1211 QalaError::UnexpectedChar { span, ch } => {
1212 assert_eq!(ch, '|');
1213 assert_eq!(span, Span::new(2, 1));
1214 }
1215 other => panic!("expected UnexpectedChar, got {other:?}"),
1216 }
1217 assert!(matches!(
1219 err("|"),
1220 QalaError::UnexpectedChar { ch: '|', .. }
1221 ));
1222 assert!(matches!(
1223 err("|x"),
1224 QalaError::UnexpectedChar { ch: '|', .. }
1225 ));
1226 }
1227
1228 #[test]
1229 fn a_non_ascii_byte_in_identifier_position_is_unexpected_char() {
1230 match err("let café = 1") {
1231 QalaError::UnexpectedChar { span, ch } => {
1232 assert_eq!(ch, 'é');
1233 let e_start = "let caf".len();
1235 assert_eq!(span, Span::new(e_start, 'é'.len_utf8()));
1236 assert_eq!(span.slice("let café = 1"), "é");
1237 }
1238 other => panic!("expected UnexpectedChar, got {other:?}"),
1239 }
1240 assert_eq!(kinds("// café au lait\n"), vec![TokenKind::Eof]);
1242 }
1243
1244 #[test]
1245 fn representative_token_spans_are_exact() {
1246 let src = "fn main()";
1248 let toks = Lexer::tokenize(src).unwrap();
1249 let main_tok = &toks[1];
1250 assert_eq!(main_tok.kind, TokenKind::Ident("main".to_string()));
1251 assert_eq!(main_tok.span.start, 3);
1252 assert_eq!(main_tok.span.len, 4);
1253 assert_eq!(main_tok.span.slice(src), "main");
1254 assert_eq!(toks[0].span, Span::new(0, 2));
1256 assert_eq!(toks[0].span.slice(src), "fn");
1257 let eof = toks.last().unwrap();
1259 assert_eq!(eof.kind, TokenKind::Eof);
1260 assert_eq!(eof.span, Span::new(src.len(), 0));
1261 }
1262
1263 #[test]
1264 fn span_after_trivia_starts_at_the_token_not_the_whitespace() {
1265 let src = " + ";
1267 let toks = Lexer::tokenize(src).unwrap();
1268 assert_eq!(toks[0].kind, TokenKind::Plus);
1269 assert_eq!(toks[0].span, Span::new(2, 1));
1270 }
1271
1272 fn one(src: &str) -> TokenKind {
1276 let mut k = kinds(src);
1277 assert_eq!(
1278 k.last(),
1279 Some(&TokenKind::Eof),
1280 "stream should end in Eof: {src:?}"
1281 );
1282 k.pop();
1283 assert_eq!(
1284 k.len(),
1285 1,
1286 "expected exactly one token in {src:?}, got {k:?}"
1287 );
1288 k.pop().unwrap()
1289 }
1290
1291 #[test]
1292 fn decimal_integers() {
1293 assert_eq!(one("0"), TokenKind::Int(0));
1294 assert_eq!(one("42"), TokenKind::Int(42));
1295 assert_eq!(one("1_000_000"), TokenKind::Int(1_000_000));
1296 assert_eq!(one("9223372036854775807"), TokenKind::Int(i64::MAX));
1297 }
1298
1299 #[test]
1300 fn hex_integers() {
1301 assert_eq!(one("0xFF"), TokenKind::Int(255));
1302 assert_eq!(one("0xFF_FF"), TokenKind::Int(65_535));
1303 assert_eq!(one("0X1a"), TokenKind::Int(26));
1304 assert_eq!(one("0x0"), TokenKind::Int(0));
1305 }
1306
1307 #[test]
1308 fn binary_integers() {
1309 assert_eq!(one("0b1010"), TokenKind::Int(10));
1310 assert_eq!(one("0b1010_0101"), TokenKind::Int(165));
1311 assert_eq!(one("0B1"), TokenKind::Int(1));
1312 }
1313
1314 #[test]
1315 fn integer_overflow_errors_at_the_digits() {
1316 match err("9223372036854775808") {
1318 QalaError::IntOverflow { span } => {
1319 assert_eq!(span, Span::new(0, 19), "span should cover the 19 digits");
1320 }
1321 other => panic!("expected IntOverflow, got {other:?}"),
1322 }
1323 match err("0x8000000000000000") {
1324 QalaError::IntOverflow { span } => assert_eq!(span, Span::new(0, 18)),
1325 other => panic!("expected IntOverflow, got {other:?}"),
1326 }
1327 let big = "1".repeat(50);
1329 assert!(matches!(err(&big), QalaError::IntOverflow { .. }));
1330 match err("let x = 99999999999999999999\n") {
1332 QalaError::IntOverflow { span } => {
1333 assert_eq!(
1334 span.slice("let x = 99999999999999999999\n"),
1335 "99999999999999999999"
1336 );
1337 }
1338 other => panic!("expected IntOverflow, got {other:?}"),
1339 }
1340 }
1341
1342 #[test]
1343 fn malformed_numbers_span_the_literal() {
1344 for src in [
1345 "1_", "1__0", "0x", "0xG", "0b2", "1_.0", "1._5", "1e_5", "1e", "1e+", "1e-", "0b",
1346 "0x_FF", "0b_1", "0xFF_",
1347 ] {
1348 match Lexer::tokenize(src) {
1349 Err(QalaError::MalformedNumber { span, .. }) => {
1350 assert_eq!(
1351 span.slice(src),
1352 src,
1353 "MalformedNumber span should cover the whole literal {src:?}"
1354 );
1355 }
1356 other => panic!("expected MalformedNumber for {src:?}, got {other:?}"),
1357 }
1358 }
1359 match err("a = 1__0;") {
1361 QalaError::MalformedNumber { span, .. } => assert_eq!(span.slice("a = 1__0;"), "1__0"),
1362 other => panic!("expected MalformedNumber, got {other:?}"),
1363 }
1364 }
1365
1366 #[test]
1367 fn floats_including_exponents() {
1368 assert_eq!(one("1.0"), TokenKind::Float(1.0));
1369 assert_eq!(one("1.5e10"), TokenKind::Float(1.5e10));
1370 assert_eq!(one("1e10"), TokenKind::Float(1e10));
1371 assert_eq!(one("2.0e-3"), TokenKind::Float(2.0e-3));
1372 assert_eq!(one("1.5E+2"), TokenKind::Float(150.0));
1373 assert_eq!(one("7.25"), TokenKind::Float(7.25));
1374 assert_eq!(one("0.1"), TokenKind::Float(0.1));
1376 assert_eq!(one("1_000.000_5"), TokenKind::Float(1000.0005));
1378 }
1379
1380 #[test]
1381 fn the_leading_dot_rule_keeps_dot_and_ranges_unambiguous() {
1382 assert_eq!(
1384 kinds(".5"),
1385 vec![TokenKind::Dot, TokenKind::Int(5), TokenKind::Eof]
1386 );
1387 assert_eq!(
1388 kinds("0..5"),
1389 vec![
1390 TokenKind::Int(0),
1391 TokenKind::DotDot,
1392 TokenKind::Int(5),
1393 TokenKind::Eof
1394 ]
1395 );
1396 assert_eq!(
1397 kinds("0..=5"),
1398 vec![
1399 TokenKind::Int(0),
1400 TokenKind::DotDotEq,
1401 TokenKind::Int(5),
1402 TokenKind::Eof
1403 ]
1404 );
1405 assert_eq!(
1406 kinds("x.0"),
1407 vec![
1408 TokenKind::Ident("x".to_string()),
1409 TokenKind::Dot,
1410 TokenKind::Int(0),
1411 TokenKind::Eof,
1412 ]
1413 );
1414 assert_eq!(
1416 kinds("0..15"),
1417 vec![
1418 TokenKind::Int(0),
1419 TokenKind::DotDot,
1420 TokenKind::Int(15),
1421 TokenKind::Eof
1422 ]
1423 );
1424 assert_eq!(one("1.0"), TokenKind::Float(1.0));
1426 assert_eq!(
1428 kinds("1..2"),
1429 vec![
1430 TokenKind::Int(1),
1431 TokenKind::DotDot,
1432 TokenKind::Int(2),
1433 TokenKind::Eof
1434 ]
1435 );
1436 }
1437
1438 #[test]
1439 fn byte_literals() {
1440 assert_eq!(one("b'A'"), TokenKind::Byte(65));
1441 assert_eq!(one("b'\\n'"), TokenKind::Byte(10));
1442 assert_eq!(one("b'\\t'"), TokenKind::Byte(9));
1443 assert_eq!(one("b'\\r'"), TokenKind::Byte(13));
1444 assert_eq!(one("b'\\\\'"), TokenKind::Byte(92));
1445 assert_eq!(one("b'\\''"), TokenKind::Byte(39));
1446 assert_eq!(one("b'\\0'"), TokenKind::Byte(0));
1447 assert_eq!(one("b' '"), TokenKind::Byte(32));
1448 assert_eq!(one("b'z'"), TokenKind::Byte(122));
1449 }
1450
1451 #[test]
1452 fn bad_byte_literals() {
1453 for src in ["b''", "b'ab'", "b'\\x'", "b'é'"] {
1454 match Lexer::tokenize(src) {
1455 Err(QalaError::BadByteLiteral { span, .. }) => {
1456 assert_eq!(span.start, 0, "span should start at `b` for {src:?}");
1458 assert!(span.len >= 3, "span should cover the literal for {src:?}");
1459 }
1460 other => panic!("expected BadByteLiteral for {src:?}, got {other:?}"),
1461 }
1462 }
1463 }
1464
1465 #[test]
1466 fn b_not_followed_by_quote_is_the_identifier_b() {
1467 assert_eq!(
1468 kinds("by"),
1469 vec![TokenKind::Ident("by".to_string()), TokenKind::Eof]
1470 );
1471 assert_eq!(
1472 kinds("b 1"),
1473 vec![
1474 TokenKind::Ident("b".to_string()),
1475 TokenKind::Int(1),
1476 TokenKind::Eof
1477 ]
1478 );
1479 assert_eq!(
1480 kinds("b"),
1481 vec![TokenKind::Ident("b".to_string()), TokenKind::Eof]
1482 );
1483 assert_eq!(kinds("byte"), vec![TokenKind::ByteTy, TokenKind::Eof]);
1485 }
1486
1487 #[test]
1488 fn numbers_in_a_realistic_snippet() {
1489 let toks = kinds("for i in 0..15 { x = 1_000 + 0xFF }");
1491 assert_eq!(
1492 toks,
1493 vec![
1494 TokenKind::For,
1495 TokenKind::Ident("i".to_string()),
1496 TokenKind::In,
1497 TokenKind::Int(0),
1498 TokenKind::DotDot,
1499 TokenKind::Int(15),
1500 TokenKind::LBrace,
1501 TokenKind::Ident("x".to_string()),
1502 TokenKind::Eq,
1503 TokenKind::Int(1_000),
1504 TokenKind::Plus,
1505 TokenKind::Int(255),
1506 TokenKind::RBrace,
1507 TokenKind::Eof,
1508 ]
1509 );
1510 }
1511
1512 #[test]
1513 fn numeric_literal_spans_are_exact() {
1514 let src = "x = 42";
1516 let toks = Lexer::tokenize(src).unwrap();
1517 let lit = &toks[2];
1518 assert_eq!(lit.kind, TokenKind::Int(42));
1519 assert_eq!(lit.span, Span::new(4, 2));
1520 assert_eq!(lit.span.slice(src), "42");
1521 let src2 = "y = 1.5e10;";
1523 let toks2 = Lexer::tokenize(src2).unwrap();
1524 let f = &toks2[2];
1525 assert_eq!(f.kind, TokenKind::Float(1.5e10));
1526 assert_eq!(f.span.slice(src2), "1.5e10");
1527 }
1528
1529 fn s(text: &str) -> TokenKind {
1537 TokenKind::Str(text.to_string())
1538 }
1539 fn id(text: &str) -> TokenKind {
1540 TokenKind::Ident(text.to_string())
1541 }
1542
1543 #[test]
1544 fn interpolation_free_strings() {
1545 assert_eq!(one("\"abc\""), s("abc"));
1546 assert_eq!(one("\"\""), s(""));
1547 assert_eq!(one("\"hello, world\""), s("hello, world"));
1548 assert_eq!(one("\"café\""), s("café"));
1550 let src = "x = \"hi\"";
1552 let toks = Lexer::tokenize(src).unwrap();
1553 assert_eq!(toks[2].kind, s("hi"));
1554 assert_eq!(toks[2].span.slice(src), "\"hi\"");
1555 }
1556
1557 #[test]
1558 fn escapes_are_decoded_into_the_payload() {
1559 assert_eq!(one("\"a\\nb\""), s("a\nb"));
1560 assert_eq!(one("\"\\t\""), s("\t"));
1561 assert_eq!(one("\"\\r\""), s("\r"));
1562 assert_eq!(one("\"\\0\""), s("\0"));
1563 assert_eq!(one("\"\\\\\""), s("\\"));
1564 assert_eq!(one("\"\\\"\""), s("\""));
1565 assert_eq!(one("\"\\{\""), s("{"));
1566 assert_eq!(one("\"\\}\""), s("}"));
1567 assert_eq!(one("\"\\u{41}\""), s("A"));
1568 assert_eq!(one("\"\\u{1F600}\""), s("\u{1F600}"));
1569 assert_eq!(
1571 one("\"line1\\nline2 \\u{2764}\""),
1572 s("line1\nline2 \u{2764}")
1573 );
1574 assert_eq!(one("\"a \\{not interp\\} b\""), s("a {not interp} b"));
1576 }
1577
1578 #[test]
1579 fn bad_escapes_error_at_the_backslash() {
1580 match err("\"a\\qb\"") {
1582 QalaError::InvalidEscape { span, .. } => {
1583 assert_eq!(span, Span::new(2, 1), "span should be the backslash byte");
1584 }
1585 other => panic!("expected InvalidEscape, got {other:?}"),
1586 }
1587 for src in [
1589 "\"\\u{}\"",
1590 "\"\\u{110000}\"",
1591 "\"\\u{D800}\"",
1592 "\"\\u{41\"",
1593 "\"\\uABCD\"",
1594 ] {
1595 assert!(
1596 matches!(Lexer::tokenize(src), Err(QalaError::InvalidEscape { .. })),
1597 "expected InvalidEscape for {src:?}"
1598 );
1599 }
1600 match err("\"hello \\x world\"") {
1602 QalaError::InvalidEscape { span, .. } => {
1603 assert_eq!(span.slice("\"hello \\x world\""), "\\");
1604 }
1605 other => panic!("expected InvalidEscape, got {other:?}"),
1606 }
1607 }
1608
1609 #[test]
1610 fn unterminated_string_errors_at_the_opening_quote() {
1611 match err("\"abc") {
1613 QalaError::UnterminatedString { span } => {
1614 assert_eq!(span, Span::new(0, 1), "span should be the opening quote");
1615 }
1616 other => panic!("expected UnterminatedString, got {other:?}"),
1617 }
1618 match err("\"abc\ndef\"") {
1620 QalaError::UnterminatedString { span } => assert_eq!(span, Span::new(0, 1)),
1621 other => panic!("expected UnterminatedString, got {other:?}"),
1622 }
1623 match err("let x = \"oops") {
1625 QalaError::UnterminatedString { span } => {
1626 assert_eq!(span.slice("let x = \"oops"), "\"");
1627 }
1628 other => panic!("expected UnterminatedString, got {other:?}"),
1629 }
1630 assert!(matches!(err("\""), QalaError::UnterminatedString { .. }));
1632 }
1633
1634 #[test]
1635 fn simple_interpolation() {
1636 assert_eq!(
1638 kinds("\"hi {name}!\""),
1639 vec![
1640 TokenKind::StrStart("hi ".to_string()),
1641 TokenKind::InterpStart,
1642 id("name"),
1643 TokenKind::InterpEnd,
1644 TokenKind::StrEnd("!".to_string()),
1645 TokenKind::Eof,
1646 ]
1647 );
1648 assert_eq!(
1650 kinds("println(\"hello, {name}!\")"),
1651 vec![
1652 id("println"),
1653 TokenKind::LParen,
1654 TokenKind::StrStart("hello, ".to_string()),
1655 TokenKind::InterpStart,
1656 id("name"),
1657 TokenKind::InterpEnd,
1658 TokenKind::StrEnd("!".to_string()),
1659 TokenKind::RParen,
1660 TokenKind::Eof,
1661 ]
1662 );
1663 }
1664
1665 #[test]
1666 fn multiple_and_empty_fragment_interpolations() {
1667 assert_eq!(
1669 kinds("\"{a}{b}\""),
1670 vec![
1671 TokenKind::StrStart(String::new()),
1672 TokenKind::InterpStart,
1673 id("a"),
1674 TokenKind::InterpEnd,
1675 TokenKind::StrMid(String::new()),
1676 TokenKind::InterpStart,
1677 id("b"),
1678 TokenKind::InterpEnd,
1679 TokenKind::StrEnd(String::new()),
1680 TokenKind::Eof,
1681 ]
1682 );
1683 assert_eq!(
1685 kinds("\"a{x}b{y}c\""),
1686 vec![
1687 TokenKind::StrStart("a".to_string()),
1688 TokenKind::InterpStart,
1689 id("x"),
1690 TokenKind::InterpEnd,
1691 TokenKind::StrMid("b".to_string()),
1692 TokenKind::InterpStart,
1693 id("y"),
1694 TokenKind::InterpEnd,
1695 TokenKind::StrEnd("c".to_string()),
1696 TokenKind::Eof,
1697 ]
1698 );
1699 }
1700
1701 #[test]
1702 fn interpolation_with_nested_braces() {
1703 assert_eq!(
1706 kinds("\"{ {a: 1}.a }\""),
1707 vec![
1708 TokenKind::StrStart(String::new()),
1709 TokenKind::InterpStart,
1710 TokenKind::LBrace,
1711 id("a"),
1712 TokenKind::Colon,
1713 TokenKind::Int(1),
1714 TokenKind::RBrace,
1715 TokenKind::Dot,
1716 id("a"),
1717 TokenKind::InterpEnd,
1718 TokenKind::StrEnd(String::new()),
1719 TokenKind::Eof,
1720 ]
1721 );
1722 assert_eq!(
1725 kinds("\"{ if x { 1 } else { 2 } }\""),
1726 vec![
1727 TokenKind::StrStart(String::new()),
1728 TokenKind::InterpStart,
1729 TokenKind::If,
1730 id("x"),
1731 TokenKind::LBrace,
1732 TokenKind::Int(1),
1733 TokenKind::RBrace,
1734 TokenKind::Else,
1735 TokenKind::LBrace,
1736 TokenKind::Int(2),
1737 TokenKind::RBrace,
1738 TokenKind::InterpEnd,
1739 TokenKind::StrEnd(String::new()),
1740 TokenKind::Eof,
1741 ]
1742 );
1743 }
1744
1745 #[test]
1746 fn interpolation_with_a_nested_string() {
1747 assert_eq!(
1752 kinds("\"{ \"{inner}\" }\""),
1753 vec![
1754 TokenKind::StrStart(String::new()),
1755 TokenKind::InterpStart,
1756 TokenKind::StrStart(String::new()),
1758 TokenKind::InterpStart,
1759 id("inner"),
1760 TokenKind::InterpEnd,
1761 TokenKind::StrEnd(String::new()),
1762 TokenKind::InterpEnd,
1764 TokenKind::StrEnd(String::new()),
1765 TokenKind::Eof,
1766 ]
1767 );
1768 }
1769
1770 #[test]
1771 fn the_fibonacci_interpolation() {
1772 assert_eq!(
1776 kinds("\"fib({i}) = {fibonacci(i)}\""),
1777 vec![
1778 TokenKind::StrStart("fib(".to_string()),
1779 TokenKind::InterpStart,
1780 id("i"),
1781 TokenKind::InterpEnd,
1782 TokenKind::StrMid(") = ".to_string()),
1783 TokenKind::InterpStart,
1784 id("fibonacci"),
1785 TokenKind::LParen,
1786 id("i"),
1787 TokenKind::RParen,
1788 TokenKind::InterpEnd,
1789 TokenKind::StrEnd(String::new()),
1790 TokenKind::Eof,
1791 ]
1792 );
1793 }
1794
1795 #[test]
1796 fn unterminated_interpolation_errors_at_the_brace() {
1797 match err("\"x{y") {
1799 QalaError::UnterminatedInterpolation { span } => {
1800 assert_eq!(span, Span::new(2, 1), "span should be the `{{` byte");
1801 }
1802 other => panic!("expected UnterminatedInterpolation, got {other:?}"),
1803 }
1804 match err("let s = \"a{b") {
1806 QalaError::UnterminatedInterpolation { span } => {
1807 assert_eq!(span.slice("let s = \"a{b"), "{");
1808 }
1809 other => panic!("expected UnterminatedInterpolation, got {other:?}"),
1810 }
1811 assert!(matches!(
1813 err("\"{ {a: 1}.a "),
1814 QalaError::UnterminatedInterpolation { .. }
1815 ));
1816 }
1817
1818 #[test]
1819 fn interpolation_fragment_spans_partition_the_source() {
1820 let src = "\"hi {x}!\"";
1823 let toks = Lexer::tokenize(src).unwrap();
1824 assert_eq!(toks[0].kind, TokenKind::StrStart("hi ".to_string()));
1826 assert_eq!(toks[0].span.slice(src), "\"hi ");
1827 assert_eq!(toks[1].kind, TokenKind::InterpStart);
1828 assert_eq!(toks[1].span.slice(src), "{");
1829 assert_eq!(toks[2].kind, id("x"));
1830 assert_eq!(toks[2].span.slice(src), "x");
1831 assert_eq!(toks[3].kind, TokenKind::InterpEnd);
1832 assert_eq!(toks[3].span.slice(src), "}");
1833 assert_eq!(toks[4].kind, TokenKind::StrEnd("!".to_string()));
1834 assert_eq!(toks[4].span.slice(src), "!\"");
1835 assert_eq!(toks[5].kind, TokenKind::Eof);
1836 }
1837
1838 #[test]
1839 fn the_six_examples_tokenize_to_eof_with_no_error() {
1840 for name in [
1844 "hello",
1845 "fibonacci",
1846 "effects",
1847 "pattern-matching",
1848 "pipeline",
1849 "defer-demo",
1850 ] {
1851 let path = format!(
1852 "{}/../../playground/public/examples/{}.qala",
1853 env!("CARGO_MANIFEST_DIR"),
1854 name
1855 );
1856 let src = std::fs::read_to_string(&path)
1857 .unwrap_or_else(|e| panic!("could not read example {path}: {e}"));
1858 let toks = Lexer::tokenize(&src)
1859 .unwrap_or_else(|e| panic!("example {name}.qala failed to tokenize: {e:?}"));
1860 assert_eq!(
1861 toks.last().map(|t| &t.kind),
1862 Some(&TokenKind::Eof),
1863 "example {name}.qala should end in Eof"
1864 );
1865 assert!(
1866 toks.len() > 1,
1867 "example {name}.qala should produce real tokens"
1868 );
1869 }
1870 }
1871
1872 #[test]
1873 fn tokenizing_is_deterministic() {
1874 let src = "fn main() is io {\n let name = \"world\"\n println(\"hello, {name}!\")\n}\n";
1877 let a = Lexer::tokenize(src).unwrap();
1878 let b = Lexer::tokenize(src).unwrap();
1879 assert_eq!(a, b);
1880 let bad = "let x = \"unterminated";
1882 assert_eq!(Lexer::tokenize(bad), Lexer::tokenize(bad));
1883 }
1884}