1use crate::error::{CompileError, LexError, Result};
2use crate::intern::{InternedStr, StringInterner};
3use crate::source::{FileId, SourceLocation};
4use crate::token::{Comment, CommentKind, Token, TokenKind};
5
6pub trait IdentResolver {
12 fn resolve_ident(&mut self, s: &str) -> Option<InternedStr>;
17}
18
19pub struct Interning<'a>(pub &'a mut StringInterner);
21
22impl IdentResolver for Interning<'_> {
23 fn resolve_ident(&mut self, s: &str) -> Option<InternedStr> {
24 Some(self.0.intern(s)) }
26}
27
28pub struct LookupOnly<'a>(pub &'a StringInterner);
30
31impl IdentResolver for LookupOnly<'_> {
32 fn resolve_ident(&mut self, s: &str) -> Option<InternedStr> {
33 self.0.lookup(s) }
35}
36
37pub struct Lexer<'a, R: IdentResolver> {
39 source: &'a [u8],
40 pos: usize,
41 line: u32,
42 column: u32,
43 file_id: FileId,
44 resolver: R,
45 return_spaces: bool,
47 _marker: std::marker::PhantomData<&'a ()>,
49}
50
51pub type MutableLexer<'a> = Lexer<'a, Interning<'a>>;
53
54pub type ReadOnlyLexer<'a> = Lexer<'a, LookupOnly<'a>>;
56
57impl<'a> Lexer<'a, Interning<'a>> {
58 pub fn new(source: &'a [u8], file_id: FileId, interner: &'a mut StringInterner) -> Self {
60 Self {
61 source,
62 pos: 0,
63 line: 1,
64 column: 1,
65 file_id,
66 resolver: Interning(interner),
67 return_spaces: false,
68 _marker: std::marker::PhantomData,
69 }
70 }
71}
72
73impl<'a> Lexer<'a, LookupOnly<'a>> {
74 pub fn new_readonly(source: &'a [u8], file_id: FileId, interner: &'a StringInterner) -> Self {
79 Self {
80 source,
81 pos: 0,
82 line: 1,
83 column: 1,
84 file_id,
85 resolver: LookupOnly(interner),
86 return_spaces: false,
87 _marker: std::marker::PhantomData,
88 }
89 }
90}
91
92impl<'a, R: IdentResolver> Lexer<'a, R> {
93
94 pub fn set_return_spaces(&mut self, enabled: bool) {
96 self.return_spaces = enabled;
97 }
98
99 pub fn return_spaces(&self) -> bool {
101 self.return_spaces
102 }
103
104 pub fn current_location(&self) -> SourceLocation {
106 SourceLocation::new(self.file_id, self.line, self.column)
107 }
108
109 pub fn file_id(&self) -> FileId {
111 self.file_id
112 }
113
114 pub fn next_token(&mut self) -> Result<Token> {
116 let mut leading_comments = Vec::new();
117
118 loop {
119 if self.return_spaces {
121 if let Some(c) = self.peek() {
122 if c == b' ' || c == b'\t' {
123 let loc = self.current_location();
124 self.advance();
125 while let Some(c) = self.peek() {
127 if c == b' ' || c == b'\t' {
128 self.advance();
129 } else {
130 break;
131 }
132 }
133 return Ok(Token::with_comments(TokenKind::Space, loc, leading_comments));
134 }
135 }
136 } else {
137 self.skip_whitespace();
138 }
139
140 match (self.peek(), self.peek_n(1)) {
141 (Some(b'/'), Some(b'/')) => {
142 let comment = self.scan_line_comment();
143 leading_comments.push(comment);
144 }
145 (Some(b'/'), Some(b'*')) => {
146 let comment = self.scan_block_comment()?;
147 leading_comments.push(comment);
148 }
149 _ => break,
150 }
151 }
152
153 let loc = self.current_location();
154 let kind = self.scan_token_kind()?;
155
156 Ok(Token::with_comments(kind, loc, leading_comments))
157 }
158
159 fn peek(&self) -> Option<u8> {
161 self.source.get(self.pos).copied()
162 }
163
164 fn peek_n(&self, n: usize) -> Option<u8> {
166 self.source.get(self.pos + n).copied()
167 }
168
169 fn advance(&mut self) -> Option<u8> {
171 let c = self.peek()?;
172 self.pos += 1;
173 if c == b'\n' {
174 self.line += 1;
175 self.column = 1;
176 } else {
177 self.column += 1;
178 }
179 Some(c)
180 }
181
182 fn skip_whitespace(&mut self) {
184 while let Some(c) = self.peek() {
185 if c == b' ' || c == b'\t' || c == b'\r' {
186 self.advance();
187 } else {
188 break;
189 }
190 }
191 }
192
193 fn scan_line_comment(&mut self) -> Comment {
195 let loc = self.current_location();
196 self.advance(); self.advance(); let start = self.pos;
200 while self.peek().is_some_and(|c| c != b'\n') {
201 self.advance();
202 }
203 let text = String::from_utf8_lossy(&self.source[start..self.pos]).to_string();
204
205 Comment::new(CommentKind::Line, text, loc)
206 }
207
208 fn scan_block_comment(&mut self) -> Result<Comment> {
210 let loc = self.current_location();
211 self.advance(); self.advance(); let start = self.pos;
215 loop {
216 match (self.peek(), self.peek_n(1)) {
217 (Some(b'*'), Some(b'/')) => {
218 let text = String::from_utf8_lossy(&self.source[start..self.pos]).to_string();
219 self.advance(); self.advance(); return Ok(Comment::new(CommentKind::Block, text, loc));
222 }
223 (Some(_), _) => {
224 self.advance();
225 }
226 (None, _) => {
227 return Err(CompileError::Lex {
228 loc,
229 kind: LexError::UnterminatedComment,
230 });
231 }
232 }
233 }
234 }
235
236 fn scan_token_kind(&mut self) -> Result<TokenKind> {
238 let Some(c) = self.peek() else {
239 return Ok(TokenKind::Eof);
240 };
241
242 match c {
243 b'\n' => {
245 self.advance();
246 Ok(TokenKind::Newline)
247 }
248 b'L' if matches!(self.peek_n(1), Some(b'"') | Some(b'\'')) => {
250 self.advance(); if self.peek() == Some(b'"') {
252 self.scan_wide_string()
253 } else {
254 self.scan_wide_char()
255 }
256 }
257
258 b'a'..=b'z' | b'A'..=b'Z' | b'_' => self.scan_identifier(),
260
261 b'0'..=b'9' => self.scan_number(),
263
264 b'"' => self.scan_string(),
266
267 b'\'' => self.scan_char(),
269
270 b'+' => self.scan_plus(),
272 b'-' => self.scan_minus(),
273 b'*' => self.scan_star(),
274 b'/' => self.scan_slash(),
275 b'%' => self.scan_percent(),
276 b'&' => self.scan_amp(),
277 b'|' => self.scan_pipe(),
278 b'^' => self.scan_caret(),
279 b'~' => {
280 self.advance();
281 Ok(TokenKind::Tilde)
282 }
283 b'!' => self.scan_bang(),
284 b'<' => self.scan_lt(),
285 b'>' => self.scan_gt(),
286 b'=' => self.scan_eq(),
287 b'?' => {
288 self.advance();
289 Ok(TokenKind::Question)
290 }
291 b':' => {
292 self.advance();
293 Ok(TokenKind::Colon)
294 }
295 b'.' => self.scan_dot(),
296 b',' => {
297 self.advance();
298 Ok(TokenKind::Comma)
299 }
300 b';' => {
301 self.advance();
302 Ok(TokenKind::Semi)
303 }
304 b'(' => {
305 self.advance();
306 Ok(TokenKind::LParen)
307 }
308 b')' => {
309 self.advance();
310 Ok(TokenKind::RParen)
311 }
312 b'[' => {
313 self.advance();
314 Ok(TokenKind::LBracket)
315 }
316 b']' => {
317 self.advance();
318 Ok(TokenKind::RBracket)
319 }
320 b'{' => {
321 self.advance();
322 Ok(TokenKind::LBrace)
323 }
324 b'}' => {
325 self.advance();
326 Ok(TokenKind::RBrace)
327 }
328 b'#' => self.scan_hash(),
329
330 _ => {
331 let loc = self.current_location();
332 self.advance();
333 Err(CompileError::Lex {
334 loc,
335 kind: LexError::InvalidChar(c as char),
336 })
337 }
338 }
339 }
340
341 fn scan_identifier(&mut self) -> Result<TokenKind> {
343 let loc = self.current_location();
344 let start = self.pos;
345 while let Some(c) = self.peek() {
346 if c.is_ascii_alphanumeric() || c == b'_' {
347 self.advance();
348 } else {
349 break;
350 }
351 }
352
353 let text = std::str::from_utf8(&self.source[start..self.pos]).unwrap();
354
355 if let Some(kw) = TokenKind::from_keyword(text) {
357 Ok(kw)
358 } else {
359 match self.resolver.resolve_ident(text) {
361 Some(interned) => Ok(TokenKind::Ident(interned)),
362 None => Err(CompileError::Lex {
363 loc,
364 kind: LexError::UnknownIdentifier(text.to_string()),
365 }),
366 }
367 }
368 }
369
370 fn scan_number(&mut self) -> Result<TokenKind> {
372 let loc = self.current_location();
373 let start = self.pos;
374
375 if self.peek() == Some(b'0') {
377 self.advance();
378 match self.peek() {
379 Some(b'x') | Some(b'X') => return self.scan_hex_number(start, loc),
380 Some(b'b') | Some(b'B') => return self.scan_binary_number(start, loc),
381 Some(b'0'..=b'7') => return self.scan_octal_number(start, loc),
382 Some(b'.') | Some(b'e') | Some(b'E') => {
383 return self.scan_float_number(start, loc);
385 }
386 _ => {
387 return self.finish_integer(start, loc);
389 }
390 }
391 }
392
393 while self.peek().is_some_and(|c| c.is_ascii_digit()) {
395 self.advance();
396 }
397
398 if matches!(self.peek(), Some(b'.') | Some(b'e') | Some(b'E')) {
400 return self.scan_float_number(start, loc);
401 }
402
403 self.finish_integer(start, loc)
404 }
405
406 fn scan_hex_number(&mut self, start: usize, loc: SourceLocation) -> Result<TokenKind> {
408 self.advance(); let hex_start = self.pos;
411 while self.peek().is_some_and(|c| c.is_ascii_hexdigit()) {
412 self.advance();
413 }
414
415 if self.pos == hex_start {
416 return Err(CompileError::Lex {
417 loc,
418 kind: LexError::InvalidNumber("0x".to_string()),
419 });
420 }
421
422 self.finish_integer(start, loc)
423 }
424
425 fn scan_binary_number(&mut self, start: usize, loc: SourceLocation) -> Result<TokenKind> {
427 self.advance(); let bin_start = self.pos;
430 while matches!(self.peek(), Some(b'0') | Some(b'1')) {
431 self.advance();
432 }
433
434 if self.pos == bin_start {
435 return Err(CompileError::Lex {
436 loc,
437 kind: LexError::InvalidNumber("0b".to_string()),
438 });
439 }
440
441 self.finish_integer(start, loc)
442 }
443
444 fn scan_octal_number(&mut self, start: usize, loc: SourceLocation) -> Result<TokenKind> {
446 while self.peek().is_some_and(|c| matches!(c, b'0'..=b'7')) {
447 self.advance();
448 }
449
450 self.finish_integer(start, loc)
451 }
452
453 fn scan_float_number(&mut self, start: usize, loc: SourceLocation) -> Result<TokenKind> {
455 if self.peek() == Some(b'.') {
457 self.advance();
458 while self.peek().is_some_and(|c| c.is_ascii_digit()) {
459 self.advance();
460 }
461 }
462
463 if matches!(self.peek(), Some(b'e') | Some(b'E')) {
465 self.advance();
466 if matches!(self.peek(), Some(b'+') | Some(b'-')) {
467 self.advance();
468 }
469 while self.peek().is_some_and(|c| c.is_ascii_digit()) {
470 self.advance();
471 }
472 }
473
474 let _is_float = matches!(self.peek(), Some(b'f') | Some(b'F'));
476 let _is_long = matches!(self.peek(), Some(b'l') | Some(b'L'));
477 if _is_float || _is_long {
478 self.advance();
479 }
480
481 let text = std::str::from_utf8(&self.source[start..self.pos]).unwrap();
482 let value: f64 = text
483 .trim_end_matches(|c| c == 'f' || c == 'F' || c == 'l' || c == 'L')
484 .parse()
485 .map_err(|_| CompileError::Lex {
486 loc: loc.clone(),
487 kind: LexError::InvalidNumber(text.to_string()),
488 })?;
489
490 Ok(TokenKind::FloatLit(value))
491 }
492
493 fn finish_integer(&mut self, start: usize, loc: SourceLocation) -> Result<TokenKind> {
495 let mut is_unsigned = false;
497 let mut is_long = false;
498 let mut is_longlong = false;
499
500 loop {
501 match self.peek() {
502 Some(b'u') | Some(b'U') => {
503 is_unsigned = true;
504 self.advance();
505 }
506 Some(b'l') | Some(b'L') => {
507 if is_long {
508 is_longlong = true;
509 }
510 is_long = true;
511 self.advance();
512 }
513 _ => break,
514 }
515 }
516
517 let text = std::str::from_utf8(&self.source[start..self.pos]).unwrap();
518 let num_text = text
519 .trim_start_matches("0x")
520 .trim_start_matches("0X")
521 .trim_start_matches("0b")
522 .trim_start_matches("0B")
523 .trim_end_matches(|c: char| c == 'u' || c == 'U' || c == 'l' || c == 'L');
524
525 let radix = if text.starts_with("0x") || text.starts_with("0X") {
526 16
527 } else if text.starts_with("0b") || text.starts_with("0B") {
528 2
529 } else if text.starts_with('0') && text.len() > 1 && !text.contains('.') {
530 8
531 } else {
532 10
533 };
534
535 if is_unsigned || is_longlong {
536 let value = u64::from_str_radix(num_text, radix).map_err(|_| CompileError::Lex {
537 loc: loc.clone(),
538 kind: LexError::InvalidNumber(text.to_string()),
539 })?;
540 Ok(TokenKind::UIntLit(value))
541 } else {
542 let value = i64::from_str_radix(num_text, radix).map_err(|_| CompileError::Lex {
543 loc: loc.clone(),
544 kind: LexError::InvalidNumber(text.to_string()),
545 })?;
546 Ok(TokenKind::IntLit(value))
547 }
548 }
549
550 fn scan_string(&mut self) -> Result<TokenKind> {
552 let loc = self.current_location();
553 self.advance(); let mut bytes = Vec::new();
556 loop {
557 match self.peek() {
558 Some(b'"') => {
559 self.advance();
560 return Ok(TokenKind::StringLit(bytes));
561 }
562 Some(b'\\') => {
563 self.advance();
564 let escaped = self.scan_escape_sequence(&loc)?;
565 bytes.push(escaped);
566 }
567 Some(b'\n') | None => {
568 return Err(CompileError::Lex {
569 loc,
570 kind: LexError::UnterminatedString,
571 });
572 }
573 Some(c) => {
574 self.advance();
575 bytes.push(c);
576 }
577 }
578 }
579 }
580
581 fn scan_wide_string(&mut self) -> Result<TokenKind> {
583 let loc = self.current_location();
584 self.advance(); let mut chars = Vec::new();
587 loop {
588 match self.peek() {
589 Some(b'"') => {
590 self.advance();
591 return Ok(TokenKind::WideStringLit(chars));
592 }
593 Some(b'\\') => {
594 self.advance();
595 let escaped = self.scan_escape_sequence(&loc)?;
596 chars.push(escaped as u32);
597 }
598 Some(b'\n') | None => {
599 return Err(CompileError::Lex {
600 loc,
601 kind: LexError::UnterminatedString,
602 });
603 }
604 Some(c) => {
605 self.advance();
606 chars.push(c as u32);
607 }
608 }
609 }
610 }
611
612 fn scan_char(&mut self) -> Result<TokenKind> {
614 let loc = self.current_location();
615 self.advance(); let value = match self.peek() {
618 Some(b'\'') => {
619 return Err(CompileError::Lex {
620 loc,
621 kind: LexError::EmptyCharLit,
622 });
623 }
624 Some(b'\\') => {
625 self.advance();
626 self.scan_escape_sequence(&loc)?
627 }
628 Some(c) => {
629 self.advance();
630 c
631 }
632 None => {
633 return Err(CompileError::Lex {
634 loc,
635 kind: LexError::UnterminatedChar,
636 });
637 }
638 };
639
640 if self.peek() != Some(b'\'') {
641 return Err(CompileError::Lex {
642 loc,
643 kind: LexError::UnterminatedChar,
644 });
645 }
646 self.advance(); Ok(TokenKind::CharLit(value))
649 }
650
651 fn scan_wide_char(&mut self) -> Result<TokenKind> {
653 let loc = self.current_location();
654 self.advance(); let value = match self.peek() {
657 Some(b'\'') => {
658 return Err(CompileError::Lex {
659 loc,
660 kind: LexError::EmptyCharLit,
661 });
662 }
663 Some(b'\\') => {
664 self.advance();
665 self.scan_escape_sequence(&loc)? as u32
666 }
667 Some(c) => {
668 self.advance();
669 c as u32
670 }
671 None => {
672 return Err(CompileError::Lex {
673 loc,
674 kind: LexError::UnterminatedChar,
675 });
676 }
677 };
678
679 if self.peek() != Some(b'\'') {
680 return Err(CompileError::Lex {
681 loc,
682 kind: LexError::UnterminatedChar,
683 });
684 }
685 self.advance(); Ok(TokenKind::WideCharLit(value))
688 }
689
690 fn scan_escape_sequence(&mut self, loc: &SourceLocation) -> Result<u8> {
692 match self.peek() {
693 Some(b'n') => {
694 self.advance();
695 Ok(b'\n')
696 }
697 Some(b't') => {
698 self.advance();
699 Ok(b'\t')
700 }
701 Some(b'r') => {
702 self.advance();
703 Ok(b'\r')
704 }
705 Some(b'\\') => {
706 self.advance();
707 Ok(b'\\')
708 }
709 Some(b'\'') => {
710 self.advance();
711 Ok(b'\'')
712 }
713 Some(b'"') => {
714 self.advance();
715 Ok(b'"')
716 }
717 Some(b'0') => {
718 self.advance();
719 Ok(0)
720 }
721 Some(b'a') => {
722 self.advance();
723 Ok(0x07) }
725 Some(b'b') => {
726 self.advance();
727 Ok(0x08) }
729 Some(b'f') => {
730 self.advance();
731 Ok(0x0C) }
733 Some(b'v') => {
734 self.advance();
735 Ok(0x0B) }
737 Some(b'x') => {
738 self.advance();
739 self.scan_hex_escape(loc)
740 }
741 Some(c @ b'0'..=b'7') => self.scan_octal_escape(c),
742 Some(c) => Err(CompileError::Lex {
743 loc: loc.clone(),
744 kind: LexError::InvalidEscape(c as char),
745 }),
746 None => Err(CompileError::Lex {
747 loc: loc.clone(),
748 kind: LexError::UnterminatedString,
749 }),
750 }
751 }
752
753 fn scan_hex_escape(&mut self, loc: &SourceLocation) -> Result<u8> {
755 let mut value = 0u8;
756 let mut count = 0;
757
758 while let Some(c) = self.peek() {
759 if let Some(digit) = (c as char).to_digit(16) {
760 value = value.wrapping_mul(16).wrapping_add(digit as u8);
761 self.advance();
762 count += 1;
763 if count >= 2 {
764 break;
765 }
766 } else {
767 break;
768 }
769 }
770
771 if count == 0 {
772 return Err(CompileError::Lex {
773 loc: loc.clone(),
774 kind: LexError::InvalidEscape('x'),
775 });
776 }
777
778 Ok(value)
779 }
780
781 fn scan_octal_escape(&mut self, first: u8) -> Result<u8> {
783 let mut value = (first - b'0') as u8;
784 self.advance();
785
786 for _ in 0..2 {
787 if let Some(c @ b'0'..=b'7') = self.peek() {
788 value = value * 8 + (c - b'0');
789 self.advance();
790 } else {
791 break;
792 }
793 }
794
795 Ok(value)
796 }
797
798 fn scan_plus(&mut self) -> Result<TokenKind> {
801 self.advance();
802 match self.peek() {
803 Some(b'+') => {
804 self.advance();
805 Ok(TokenKind::PlusPlus)
806 }
807 Some(b'=') => {
808 self.advance();
809 Ok(TokenKind::PlusEq)
810 }
811 _ => Ok(TokenKind::Plus),
812 }
813 }
814
815 fn scan_minus(&mut self) -> Result<TokenKind> {
816 self.advance();
817 match self.peek() {
818 Some(b'-') => {
819 self.advance();
820 Ok(TokenKind::MinusMinus)
821 }
822 Some(b'=') => {
823 self.advance();
824 Ok(TokenKind::MinusEq)
825 }
826 Some(b'>') => {
827 self.advance();
828 Ok(TokenKind::Arrow)
829 }
830 _ => Ok(TokenKind::Minus),
831 }
832 }
833
834 fn scan_star(&mut self) -> Result<TokenKind> {
835 self.advance();
836 if self.peek() == Some(b'=') {
837 self.advance();
838 Ok(TokenKind::StarEq)
839 } else {
840 Ok(TokenKind::Star)
841 }
842 }
843
844 fn scan_slash(&mut self) -> Result<TokenKind> {
845 self.advance();
846 if self.peek() == Some(b'=') {
847 self.advance();
848 Ok(TokenKind::SlashEq)
849 } else {
850 Ok(TokenKind::Slash)
851 }
852 }
853
854 fn scan_percent(&mut self) -> Result<TokenKind> {
855 self.advance();
856 if self.peek() == Some(b'=') {
857 self.advance();
858 Ok(TokenKind::PercentEq)
859 } else {
860 Ok(TokenKind::Percent)
861 }
862 }
863
864 fn scan_amp(&mut self) -> Result<TokenKind> {
865 self.advance();
866 match self.peek() {
867 Some(b'&') => {
868 self.advance();
869 Ok(TokenKind::AmpAmp)
870 }
871 Some(b'=') => {
872 self.advance();
873 Ok(TokenKind::AmpEq)
874 }
875 _ => Ok(TokenKind::Amp),
876 }
877 }
878
879 fn scan_pipe(&mut self) -> Result<TokenKind> {
880 self.advance();
881 match self.peek() {
882 Some(b'|') => {
883 self.advance();
884 Ok(TokenKind::PipePipe)
885 }
886 Some(b'=') => {
887 self.advance();
888 Ok(TokenKind::PipeEq)
889 }
890 _ => Ok(TokenKind::Pipe),
891 }
892 }
893
894 fn scan_caret(&mut self) -> Result<TokenKind> {
895 self.advance();
896 if self.peek() == Some(b'=') {
897 self.advance();
898 Ok(TokenKind::CaretEq)
899 } else {
900 Ok(TokenKind::Caret)
901 }
902 }
903
904 fn scan_bang(&mut self) -> Result<TokenKind> {
905 self.advance();
906 if self.peek() == Some(b'=') {
907 self.advance();
908 Ok(TokenKind::BangEq)
909 } else {
910 Ok(TokenKind::Bang)
911 }
912 }
913
914 fn scan_lt(&mut self) -> Result<TokenKind> {
915 self.advance();
916 match self.peek() {
917 Some(b'<') => {
918 self.advance();
919 if self.peek() == Some(b'=') {
920 self.advance();
921 Ok(TokenKind::LtLtEq)
922 } else {
923 Ok(TokenKind::LtLt)
924 }
925 }
926 Some(b'=') => {
927 self.advance();
928 Ok(TokenKind::LtEq)
929 }
930 _ => Ok(TokenKind::Lt),
931 }
932 }
933
934 fn scan_gt(&mut self) -> Result<TokenKind> {
935 self.advance();
936 match self.peek() {
937 Some(b'>') => {
938 self.advance();
939 if self.peek() == Some(b'=') {
940 self.advance();
941 Ok(TokenKind::GtGtEq)
942 } else {
943 Ok(TokenKind::GtGt)
944 }
945 }
946 Some(b'=') => {
947 self.advance();
948 Ok(TokenKind::GtEq)
949 }
950 _ => Ok(TokenKind::Gt),
951 }
952 }
953
954 fn scan_eq(&mut self) -> Result<TokenKind> {
955 self.advance();
956 if self.peek() == Some(b'=') {
957 self.advance();
958 Ok(TokenKind::EqEq)
959 } else {
960 Ok(TokenKind::Eq)
961 }
962 }
963
964 fn scan_dot(&mut self) -> Result<TokenKind> {
965 self.advance();
966 if self.peek() == Some(b'.') && self.peek_n(1) == Some(b'.') {
967 self.advance();
968 self.advance();
969 Ok(TokenKind::Ellipsis)
970 } else {
971 Ok(TokenKind::Dot)
972 }
973 }
974
975 fn scan_hash(&mut self) -> Result<TokenKind> {
976 self.advance();
977 if self.peek() == Some(b'#') {
978 self.advance();
979 Ok(TokenKind::HashHash)
980 } else {
981 Ok(TokenKind::Hash)
982 }
983 }
984}
985
986#[cfg(test)]
987mod tests {
988 use super::*;
989
990 fn lex(source: &str) -> Vec<TokenKind> {
991 let mut interner = StringInterner::new();
992 let mut lexer = Lexer::new(source.as_bytes(), FileId::default(), &mut interner);
993 let mut tokens = Vec::new();
994 loop {
995 let token = lexer.next_token().unwrap();
996 if matches!(token.kind, TokenKind::Eof) {
997 break;
998 }
999 tokens.push(token.kind);
1000 }
1001 tokens
1002 }
1003
1004 #[test]
1005 fn test_operators() {
1006 let tokens = lex("+ - * / % ++ -- += -= -> == != <= >=");
1007 assert_eq!(
1008 tokens,
1009 vec![
1010 TokenKind::Plus,
1011 TokenKind::Minus,
1012 TokenKind::Star,
1013 TokenKind::Slash,
1014 TokenKind::Percent,
1015 TokenKind::PlusPlus,
1016 TokenKind::MinusMinus,
1017 TokenKind::PlusEq,
1018 TokenKind::MinusEq,
1019 TokenKind::Arrow,
1020 TokenKind::EqEq,
1021 TokenKind::BangEq,
1022 TokenKind::LtEq,
1023 TokenKind::GtEq,
1024 ]
1025 );
1026 }
1027
1028 #[test]
1029 fn test_keywords_and_identifiers() {
1030 let mut interner = StringInterner::new();
1032 let mut lexer = Lexer::new(
1033 b"int if else while for return struct foo",
1034 FileId::default(),
1035 &mut interner,
1036 );
1037
1038 let mut tokens = Vec::new();
1039 loop {
1040 let token = lexer.next_token().unwrap();
1041 if matches!(token.kind, TokenKind::Eof) {
1042 break;
1043 }
1044 tokens.push(token.kind);
1045 }
1046
1047 assert!(matches!(tokens[0], TokenKind::KwInt));
1049 assert!(matches!(tokens[1], TokenKind::KwIf));
1050 assert!(matches!(tokens[2], TokenKind::KwElse));
1051 assert!(matches!(tokens[3], TokenKind::KwWhile));
1052 assert!(matches!(tokens[4], TokenKind::KwFor));
1053 assert!(matches!(tokens[5], TokenKind::KwReturn));
1054 assert!(matches!(tokens[6], TokenKind::KwStruct));
1055 if let TokenKind::Ident(id) = tokens[7] {
1057 assert_eq!(interner.get(id), "foo");
1058 } else {
1059 panic!("Expected Ident for 'foo'");
1060 }
1061 }
1062
1063 #[test]
1064 fn test_numbers() {
1065 let tokens = lex("42 0x1F 0b101 0777 3.14 1e10");
1066 assert_eq!(
1067 tokens,
1068 vec![
1069 TokenKind::IntLit(42),
1070 TokenKind::IntLit(0x1F),
1071 TokenKind::IntLit(0b101),
1072 TokenKind::IntLit(0o777),
1073 TokenKind::FloatLit(3.14),
1074 TokenKind::FloatLit(1e10),
1075 ]
1076 );
1077 }
1078
1079 #[test]
1080 fn test_strings() {
1081 let tokens = lex(r#""hello" "world\n""#);
1082 assert_eq!(
1083 tokens,
1084 vec![
1085 TokenKind::StringLit(b"hello".to_vec()),
1086 TokenKind::StringLit(b"world\n".to_vec()),
1087 ]
1088 );
1089 }
1090
1091 #[test]
1092 fn test_comments() {
1093 let mut interner = StringInterner::new();
1094 let mut lexer = Lexer::new(
1095 b"// line comment\n42 /* block */ 100",
1096 FileId::default(),
1097 &mut interner,
1098 );
1099
1100 let newline = lexer.next_token().unwrap();
1102 assert_eq!(newline.kind, TokenKind::Newline);
1103 assert_eq!(newline.leading_comments.len(), 1);
1104 assert_eq!(newline.leading_comments[0].kind, CommentKind::Line);
1105
1106 let tok1 = lexer.next_token().unwrap();
1107 assert_eq!(tok1.kind, TokenKind::IntLit(42));
1108
1109 let tok2 = lexer.next_token().unwrap();
1110 assert_eq!(tok2.kind, TokenKind::IntLit(100));
1111 assert_eq!(tok2.leading_comments.len(), 1);
1112 assert_eq!(tok2.leading_comments[0].kind, CommentKind::Block);
1113 }
1114
1115 #[test]
1116 fn test_ellipsis() {
1117 let tokens = lex("...");
1118 assert_eq!(tokens, vec![TokenKind::Ellipsis]);
1119 }
1120}