Skip to main content

lumen_compiler/compiler/
lexer.rs

1//! Indentation-aware lexer for Lumen source code.
2
3use crate::compiler::tokens::{Span, Token, TokenKind};
4use thiserror::Error;
5
6#[derive(Debug, Error)]
7pub enum LexError {
8    #[error("unexpected character '{ch}' at line {line}, col {col}")]
9    UnexpectedChar { ch: char, line: usize, col: usize },
10    #[error("unterminated string at line {line}, col {col}")]
11    UnterminatedString { line: usize, col: usize },
12    #[error("inconsistent indentation at line {line}")]
13    InconsistentIndent { line: usize },
14    #[error("invalid number at line {line}, col {col}")]
15    InvalidNumber { line: usize, col: usize },
16    #[error("invalid bytes literal at line {line}, col {col}")]
17    InvalidBytesLiteral { line: usize, col: usize },
18    #[error("invalid unicode escape at line {line}, col {col}")]
19    InvalidUnicodeEscape { line: usize, col: usize },
20}
21
22pub struct Lexer {
23    source: Vec<char>,
24    pos: usize,
25    line: usize,
26    col: usize,
27    byte_offset: usize,
28    base_line: usize,
29    base_offset: usize,
30    indent_stack: Vec<usize>,
31    pending: Vec<Token>,
32    at_line_start: bool,
33}
34
35impl Lexer {
36    pub fn new(source: &str, base_line: usize, base_offset: usize) -> Self {
37        Self {
38            source: source.chars().collect(),
39            pos: 0,
40            line: 1,
41            col: 1,
42            byte_offset: 0,
43            base_line,
44            base_offset,
45            indent_stack: vec![0],
46            pending: Vec::new(),
47            at_line_start: true,
48        }
49    }
50
51    fn current(&self) -> Option<char> {
52        self.source.get(self.pos).copied()
53    }
54    fn peek(&self) -> Option<char> {
55        self.source.get(self.pos + 1).copied()
56    }
57    fn peek2(&self) -> Option<char> {
58        self.source.get(self.pos + 2).copied()
59    }
60
61    fn looks_like_interpolation_start(&self) -> bool {
62        let mut i = self.pos + 1; // after '{'
63        while let Some(ch) = self.source.get(i).copied() {
64            if ch.is_whitespace() {
65                i += 1;
66                continue;
67            }
68            return matches!(
69                ch,
70                'a'..='z' | 'A'..='Z' | '_' | '(' | '[' | '-' | '0'..='9'
71            );
72        }
73        false
74    }
75
76    fn advance(&mut self) -> Option<char> {
77        let ch = self.source.get(self.pos).copied()?;
78        self.pos += 1;
79        self.byte_offset += ch.len_utf8();
80        if ch == '\n' {
81            self.line += 1;
82            self.col = 1;
83            self.at_line_start = true;
84        } else {
85            self.col += 1;
86        }
87        Some(ch)
88    }
89
90    fn span_here(&self) -> Span {
91        Span::new(
92            self.base_offset + self.byte_offset,
93            self.base_offset + self.byte_offset,
94            self.base_line + self.line - 1,
95            self.col,
96        )
97    }
98
99    fn span_from(&self, so: usize, sl: usize, sc: usize) -> Span {
100        Span::new(
101            self.base_offset + so,
102            self.base_offset + self.byte_offset,
103            self.base_line + sl - 1,
104            sc,
105        )
106    }
107
108    fn handle_indentation(&mut self) -> Result<(), LexError> {
109        let mut indent = 0;
110        while let Some(ch) = self.current() {
111            match ch {
112                ' ' => {
113                    indent += 1;
114                    self.advance();
115                }
116                '\t' => {
117                    indent += 2;
118                    self.advance();
119                }
120                _ => break,
121            }
122        }
123        if matches!(self.current(), None | Some('\n') | Some('#')) {
124            if self.current().is_none() {
125                while self.indent_stack.len() > 1 {
126                    self.indent_stack.pop();
127                    self.pending
128                        .push(Token::new(TokenKind::Dedent, self.span_here()));
129                }
130            }
131            return Ok(());
132        }
133        let cur = *self.indent_stack.last().unwrap();
134        if indent > cur {
135            self.indent_stack.push(indent);
136            self.pending
137                .push(Token::new(TokenKind::Indent, self.span_here()));
138        } else if indent < cur {
139            while let Some(&top) = self.indent_stack.last() {
140                if top > indent {
141                    self.indent_stack.pop();
142                    self.pending
143                        .push(Token::new(TokenKind::Dedent, self.span_here()));
144                } else {
145                    break;
146                }
147            }
148            if *self.indent_stack.last().unwrap() != indent {
149                return Err(LexError::InconsistentIndent {
150                    line: self.base_line + self.line - 1,
151                });
152            }
153        }
154        Ok(())
155    }
156
157    /// Read a unicode escape sequence \u{XXXX}
158    fn read_unicode_escape(&mut self, sl: usize, sc: usize) -> Result<char, LexError> {
159        // We've consumed \u, now expect {
160        if self.current() != Some('{') {
161            return Err(LexError::InvalidUnicodeEscape {
162                line: self.base_line + sl - 1,
163                col: sc,
164            });
165        }
166        self.advance(); // skip {
167        let mut hex = String::new();
168        while let Some(c) = self.current() {
169            if c == '}' {
170                break;
171            }
172            hex.push(c);
173            self.advance();
174        }
175        if self.current() != Some('}') {
176            return Err(LexError::InvalidUnicodeEscape {
177                line: self.base_line + sl - 1,
178                col: sc,
179            });
180        }
181        self.advance(); // skip }
182        u32::from_str_radix(&hex, 16)
183            .ok()
184            .and_then(char::from_u32)
185            .ok_or(LexError::InvalidUnicodeEscape {
186                line: self.base_line + sl - 1,
187                col: sc,
188            })
189    }
190
191    /// Process an escape sequence after consuming the backslash
192    fn process_escape(&mut self, buf: &mut String, sl: usize, sc: usize) -> Result<(), LexError> {
193        match self.current() {
194            Some('n') => {
195                buf.push('\n');
196                self.advance();
197            }
198            Some('t') => {
199                buf.push('\t');
200                self.advance();
201            }
202            Some('r') => {
203                buf.push('\r');
204                self.advance();
205            }
206            Some('\\') => {
207                buf.push('\\');
208                self.advance();
209            }
210            Some('"') => {
211                buf.push('"');
212                self.advance();
213            }
214            Some('{') => {
215                buf.push('{');
216                self.advance();
217            }
218            Some('0') => {
219                buf.push('\0');
220                self.advance();
221            }
222            Some('u') => {
223                self.advance(); // skip u
224                let ch = self.read_unicode_escape(sl, sc)?;
225                buf.push(ch);
226            }
227            Some('x') => {
228                self.advance(); // skip x
229                let mut hex = String::new();
230                for _ in 0..2 {
231                    match self.current() {
232                        Some(c) if c.is_ascii_hexdigit() => {
233                            hex.push(c);
234                            self.advance();
235                        }
236                        _ => {
237                            return Err(LexError::InvalidUnicodeEscape {
238                                line: self.base_line + sl - 1,
239                                col: sc,
240                            })
241                        }
242                    }
243                }
244                let byte =
245                    u8::from_str_radix(&hex, 16).map_err(|_| LexError::InvalidUnicodeEscape {
246                        line: self.base_line + sl - 1,
247                        col: sc,
248                    })?;
249                buf.push(byte as char);
250            }
251            Some(c) => {
252                buf.push('\\');
253                buf.push(c);
254                self.advance();
255            }
256            None => {
257                return Err(LexError::UnterminatedString {
258                    line: self.base_line + sl - 1,
259                    col: sc,
260                })
261            }
262        }
263        Ok(())
264    }
265
266    fn read_triple_quoted_string(&mut self) -> Result<Token, LexError> {
267        let (so, sl, sc) = (self.byte_offset, self.line, self.col);
268        // Skip the three opening quotes
269        self.advance();
270        self.advance();
271        self.advance();
272
273        let mut segments = Vec::new();
274        let mut cur_segment = String::new();
275        let mut is_interp = false;
276
277        loop {
278            match self.current() {
279                None => {
280                    return Err(LexError::UnterminatedString {
281                        line: self.base_line + sl - 1,
282                        col: sc,
283                    })
284                }
285                Some('"') if self.peek() == Some('"') && self.peek2() == Some('"') => {
286                    self.advance();
287                    self.advance();
288                    self.advance();
289                    break;
290                }
291                Some('\\') => {
292                    self.advance();
293                    self.process_escape(&mut cur_segment, sl, sc)?;
294                }
295                Some('{') if self.looks_like_interpolation_start() => {
296                    is_interp = true;
297                    if !cur_segment.is_empty() {
298                        segments.push((false, cur_segment.clone()));
299                        cur_segment.clear();
300                    }
301                    self.advance(); // skip {
302                    let mut expr_str = String::new();
303                    let mut brace_balance = 1;
304                    while let Some(c) = self.current() {
305                        if c == '}' {
306                            brace_balance -= 1;
307                            if brace_balance == 0 {
308                                break;
309                            }
310                            expr_str.push(c);
311                            self.advance();
312                        } else if c == '{' {
313                            brace_balance += 1;
314                            expr_str.push(c);
315                            self.advance();
316                        } else if c == '"' {
317                            expr_str.push(c);
318                            self.advance();
319                            while let Some(ic) = self.current() {
320                                expr_str.push(ic);
321                                self.advance();
322                                if ic == '"' && !expr_str.ends_with("\\\"") {
323                                    break;
324                                }
325                            }
326                        } else {
327                            expr_str.push(c);
328                            self.advance();
329                        }
330                    }
331                    if brace_balance != 0 {
332                        return Err(LexError::UnterminatedString {
333                            line: self.base_line + sl - 1,
334                            col: sc,
335                        });
336                    }
337                    self.advance(); // skip }
338                    segments.push((true, expr_str.trim().to_string()));
339                }
340                Some('{') => {
341                    cur_segment.push('{');
342                    self.advance();
343                }
344                Some(c) => {
345                    cur_segment.push(c);
346                    self.advance();
347                }
348            }
349        }
350
351        // Dedent: strip common leading whitespace
352        let raw_content = if is_interp {
353            if !cur_segment.is_empty() {
354                segments.push((false, cur_segment));
355            }
356            // For interpolated triple-quoted, apply dedent to text segments
357            self.dedent_interp_segments(&mut segments);
358            let span = self.span_from(so, sl, sc);
359            return Ok(Token::new(TokenKind::StringInterpLit(segments), span));
360        } else {
361            cur_segment
362        };
363
364        let dedented = self.dedent_string(&raw_content);
365        let span = self.span_from(so, sl, sc);
366        Ok(Token::new(TokenKind::StringLit(dedented), span))
367    }
368
369    fn dedent_string(&self, s: &str) -> String {
370        let lines: Vec<&str> = s.split('\n').collect();
371        if lines.len() <= 1 {
372            return s.to_string();
373        }
374        // Find minimum indentation of non-empty lines (skip first line which follows """)
375        let min_indent = lines
376            .iter()
377            .skip(1)
378            .filter(|l| !l.trim().is_empty())
379            .map(|l| l.len() - l.trim_start().len())
380            .min()
381            .unwrap_or(0);
382
383        let mut result = Vec::new();
384        for (i, line) in lines.iter().enumerate() {
385            if i == 0 {
386                result.push(*line);
387            } else if line.len() >= min_indent {
388                result.push(&line[min_indent..]);
389            } else {
390                result.push(line.trim());
391            }
392        }
393        // Trim leading/trailing empty lines from the result
394        let joined = result.join("\n");
395        let trimmed = joined.trim_start_matches('\n');
396        let trimmed = trimmed.trim_end_matches('\n');
397        trimmed.to_string()
398    }
399
400    fn dedent_interp_segments(&self, segments: &mut [(bool, String)]) {
401        // Apply dedent to text-only segments
402        for seg in segments.iter_mut() {
403            if !seg.0 {
404                seg.1 = self.dedent_string(&seg.1);
405            }
406        }
407    }
408
409    fn read_raw_string(&mut self) -> Result<Token, LexError> {
410        let (so, sl, sc) = (self.byte_offset, self.line, self.col);
411        self.advance(); // skip 'r'
412                        // Check for triple-quoted raw: r"""..."""
413        if self.current() == Some('"') && self.peek() == Some('"') && self.peek2() == Some('"') {
414            self.advance();
415            self.advance();
416            self.advance(); // skip """
417            let mut content = String::new();
418            loop {
419                match self.current() {
420                    None => {
421                        return Err(LexError::UnterminatedString {
422                            line: self.base_line + sl - 1,
423                            col: sc,
424                        })
425                    }
426                    Some('"') if self.peek() == Some('"') && self.peek2() == Some('"') => {
427                        self.advance();
428                        self.advance();
429                        self.advance();
430                        break;
431                    }
432                    Some(c) => {
433                        content.push(c);
434                        self.advance();
435                    }
436                }
437            }
438            let dedented = self.dedent_string(&content);
439            let span = self.span_from(so, sl, sc);
440            return Ok(Token::new(TokenKind::RawStringLit(dedented), span));
441        }
442        // Regular raw string: r"..."
443        if self.current() != Some('"') {
444            // Not a raw string, it's an identifier starting with 'r'
445            // Put back by not advancing and let read_ident handle it
446            // Actually we already advanced past 'r', so we need to handle this differently
447            // This shouldn't happen as we check for '"' before calling read_raw_string
448            return Err(LexError::UnexpectedChar {
449                ch: self.current().unwrap_or(' '),
450                line: self.base_line + sl - 1,
451                col: sc,
452            });
453        }
454        self.advance(); // skip opening "
455        let mut content = String::new();
456        loop {
457            match self.current() {
458                None | Some('\n') => {
459                    return Err(LexError::UnterminatedString {
460                        line: self.base_line + sl - 1,
461                        col: sc,
462                    })
463                }
464                Some('"') => {
465                    self.advance();
466                    break;
467                }
468                Some(c) => {
469                    content.push(c);
470                    self.advance();
471                }
472            }
473        }
474        let span = self.span_from(so, sl, sc);
475        Ok(Token::new(TokenKind::RawStringLit(content), span))
476    }
477
478    fn read_bytes_literal(&mut self) -> Result<Token, LexError> {
479        let (so, sl, sc) = (self.byte_offset, self.line, self.col);
480        self.advance(); // skip 'b'
481        if self.current() != Some('"') {
482            return Err(LexError::InvalidBytesLiteral {
483                line: self.base_line + sl - 1,
484                col: sc,
485            });
486        }
487        self.advance(); // skip opening "
488        let mut bytes = Vec::new();
489        loop {
490            match self.current() {
491                None | Some('\n') => {
492                    return Err(LexError::UnterminatedString {
493                        line: self.base_line + sl - 1,
494                        col: sc,
495                    })
496                }
497                Some('"') => {
498                    self.advance();
499                    break;
500                }
501                Some(c) if c.is_ascii_hexdigit() => {
502                    let hi = c;
503                    self.advance();
504                    match self.current() {
505                        Some(lo) if lo.is_ascii_hexdigit() => {
506                            self.advance();
507                            let byte =
508                                u8::from_str_radix(&format!("{}{}", hi, lo), 16).map_err(|_| {
509                                    LexError::InvalidBytesLiteral {
510                                        line: self.base_line + sl - 1,
511                                        col: sc,
512                                    }
513                                })?;
514                            bytes.push(byte);
515                        }
516                        _ => {
517                            return Err(LexError::InvalidBytesLiteral {
518                                line: self.base_line + sl - 1,
519                                col: sc,
520                            })
521                        }
522                    }
523                }
524                _ => {
525                    return Err(LexError::InvalidBytesLiteral {
526                        line: self.base_line + sl - 1,
527                        col: sc,
528                    })
529                }
530            }
531        }
532        let span = self.span_from(so, sl, sc);
533        Ok(Token::new(TokenKind::BytesLit(bytes), span))
534    }
535
536    fn read_string(&mut self) -> Result<Token, LexError> {
537        // Check for triple-quoted string
538        if self.peek() == Some('"') && self.peek2() == Some('"') {
539            return self.read_triple_quoted_string();
540        }
541
542        let (so, sl, sc) = (self.byte_offset, self.line, self.col);
543        self.advance(); // opening quote
544        let mut segments = Vec::new();
545        let mut cur_segment = String::new();
546        let mut is_interp = false;
547
548        loop {
549            match self.current() {
550                None | Some('\n') => {
551                    return Err(LexError::UnterminatedString {
552                        line: self.base_line + sl - 1,
553                        col: sc,
554                    })
555                }
556                Some('\\') => {
557                    self.advance();
558                    self.process_escape(&mut cur_segment, sl, sc)?;
559                }
560                Some('{') if self.looks_like_interpolation_start() => {
561                    // Start of interpolation
562                    is_interp = true;
563                    if !cur_segment.is_empty() {
564                        segments.push((false, cur_segment.clone()));
565                        cur_segment.clear();
566                    }
567                    self.advance(); // skip {
568                                    // Read until }
569                    let mut expr_str = String::new();
570                    let mut brace_balance = 1;
571                    while let Some(c) = self.current() {
572                        if c == '}' {
573                            brace_balance -= 1;
574                            if brace_balance == 0 {
575                                break;
576                            }
577                            expr_str.push(c);
578                            self.advance();
579                        } else if c == '{' {
580                            brace_balance += 1;
581                            expr_str.push(c);
582                            self.advance();
583                        } else if c == '"' {
584                            // Handle strings inside interpolation to avoid incorrect brace matching
585                            expr_str.push(c);
586                            self.advance();
587                            while let Some(ic) = self.current() {
588                                expr_str.push(ic);
589                                self.advance();
590                                if ic == '"' && !expr_str.ends_with("\\\"") {
591                                    break;
592                                }
593                            }
594                        } else {
595                            expr_str.push(c);
596                            self.advance();
597                        }
598                    }
599                    if brace_balance != 0 {
600                        return Err(LexError::UnterminatedString {
601                            line: self.base_line + sl - 1,
602                            col: sc,
603                        });
604                    }
605                    self.advance(); // skip }
606                    segments.push((true, expr_str.trim().to_string()));
607                }
608                Some('{') => {
609                    cur_segment.push('{');
610                    self.advance();
611                }
612                Some('"') => {
613                    self.advance();
614                    break;
615                }
616                Some(c) => {
617                    cur_segment.push(c);
618                    self.advance();
619                }
620            }
621        }
622
623        let span = self.span_from(so, sl, sc);
624        if is_interp {
625            if !cur_segment.is_empty() {
626                segments.push((false, cur_segment));
627            }
628            Ok(Token::new(TokenKind::StringInterpLit(segments), span))
629        } else {
630            Ok(Token::new(TokenKind::StringLit(cur_segment), span))
631        }
632    }
633
634    fn read_number(&mut self) -> Result<Token, LexError> {
635        let (so, sl, sc) = (self.byte_offset, self.line, self.col);
636
637        // Check for 0x, 0b, 0o prefixes
638        if self.current() == Some('0') {
639            match self.peek() {
640                Some('x') | Some('X') => return self.read_hex_number(so, sl, sc),
641                Some('b') if matches!(self.peek2(), Some('0') | Some('1')) => {
642                    return self.read_bin_number(so, sl, sc)
643                }
644                Some('o') => return self.read_oct_number(so, sl, sc),
645                _ => {}
646            }
647        }
648
649        let mut ns = String::new();
650        let mut is_float = false;
651        while let Some(ch) = self.current() {
652            if ch.is_ascii_digit() {
653                ns.push(ch);
654                self.advance();
655            } else if ch == '.' && !is_float {
656                // Check for .. (range) and ... (spread) - don't consume the dot
657                if self.peek() == Some('.') {
658                    break;
659                }
660                if matches!(self.peek(), Some(d) if d.is_ascii_digit()) {
661                    is_float = true;
662                    ns.push(ch);
663                    self.advance();
664                } else {
665                    break;
666                }
667            } else if ch == '_' {
668                self.advance();
669            } else if (ch == 'e' || ch == 'E') && !is_float {
670                // Scientific notation
671                is_float = true;
672                ns.push(ch);
673                self.advance();
674                // Optional +/- sign
675                if matches!(self.current(), Some('+') | Some('-')) {
676                    ns.push(self.current().unwrap());
677                    self.advance();
678                }
679            } else {
680                break;
681            }
682        }
683        let span = self.span_from(so, sl, sc);
684        if is_float {
685            ns.parse::<f64>()
686                .map(|f| Token::new(TokenKind::FloatLit(f), span))
687                .map_err(|_| LexError::InvalidNumber {
688                    line: self.base_line + sl - 1,
689                    col: sc,
690                })
691        } else {
692            ns.parse::<i64>()
693                .map(|n| Token::new(TokenKind::IntLit(n), span))
694                .map_err(|_| LexError::InvalidNumber {
695                    line: self.base_line + sl - 1,
696                    col: sc,
697                })
698        }
699    }
700
701    fn read_hex_number(&mut self, so: usize, sl: usize, sc: usize) -> Result<Token, LexError> {
702        self.advance(); // skip 0
703        self.advance(); // skip x/X
704        let mut hex = String::new();
705        while let Some(ch) = self.current() {
706            if ch.is_ascii_hexdigit() {
707                hex.push(ch);
708                self.advance();
709            } else if ch == '_' {
710                self.advance();
711            } else {
712                break;
713            }
714        }
715        if hex.is_empty() {
716            return Err(LexError::InvalidNumber {
717                line: self.base_line + sl - 1,
718                col: sc,
719            });
720        }
721        let span = self.span_from(so, sl, sc);
722        i64::from_str_radix(&hex, 16)
723            .map(|n| Token::new(TokenKind::IntLit(n), span))
724            .map_err(|_| LexError::InvalidNumber {
725                line: self.base_line + sl - 1,
726                col: sc,
727            })
728    }
729
730    fn read_bin_number(&mut self, so: usize, sl: usize, sc: usize) -> Result<Token, LexError> {
731        self.advance(); // skip 0
732        self.advance(); // skip b
733        let mut bin = String::new();
734        while let Some(ch) = self.current() {
735            if ch == '0' || ch == '1' {
736                bin.push(ch);
737                self.advance();
738            } else if ch == '_' {
739                self.advance();
740            } else {
741                break;
742            }
743        }
744        if bin.is_empty() {
745            return Err(LexError::InvalidNumber {
746                line: self.base_line + sl - 1,
747                col: sc,
748            });
749        }
750        let span = self.span_from(so, sl, sc);
751        i64::from_str_radix(&bin, 2)
752            .map(|n| Token::new(TokenKind::IntLit(n), span))
753            .map_err(|_| LexError::InvalidNumber {
754                line: self.base_line + sl - 1,
755                col: sc,
756            })
757    }
758
759    fn read_oct_number(&mut self, so: usize, sl: usize, sc: usize) -> Result<Token, LexError> {
760        self.advance(); // skip 0
761        self.advance(); // skip o
762        let mut oct = String::new();
763        while let Some(ch) = self.current() {
764            if ('0'..='7').contains(&ch) {
765                oct.push(ch);
766                self.advance();
767            } else if ch == '_' {
768                self.advance();
769            } else {
770                break;
771            }
772        }
773        if oct.is_empty() {
774            return Err(LexError::InvalidNumber {
775                line: self.base_line + sl - 1,
776                col: sc,
777            });
778        }
779        let span = self.span_from(so, sl, sc);
780        i64::from_str_radix(&oct, 8)
781            .map(|n| Token::new(TokenKind::IntLit(n), span))
782            .map_err(|_| LexError::InvalidNumber {
783                line: self.base_line + sl - 1,
784                col: sc,
785            })
786    }
787
788    fn read_ident(&mut self) -> Token {
789        let (so, sl, sc) = (self.byte_offset, self.line, self.col);
790        let mut id = String::new();
791        while let Some(ch) = self.current() {
792            if ch.is_alphanumeric() || ch == '_' {
793                id.push(ch);
794                self.advance();
795            } else {
796                break;
797            }
798        }
799        let span = self.span_from(so, sl, sc);
800        let kind = match id.as_str() {
801            "record" => TokenKind::Record,
802            "enum" => TokenKind::Enum,
803            "cell" => TokenKind::Cell,
804            "let" => TokenKind::Let,
805            "if" => TokenKind::If,
806            "else" => TokenKind::Else,
807            "for" => TokenKind::For,
808            "in" => TokenKind::In,
809            "match" => TokenKind::Match,
810            "return" => TokenKind::Return,
811            "halt" => TokenKind::Halt,
812            "end" => TokenKind::End,
813            "use" => TokenKind::Use,
814            "tool" => TokenKind::Tool,
815            "as" => TokenKind::As,
816            "grant" => TokenKind::Grant,
817            "expect" => TokenKind::Expect,
818            "schema" => TokenKind::Schema,
819            "role" => TokenKind::Role,
820            "where" => TokenKind::Where,
821            "and" => TokenKind::And,
822            "or" => TokenKind::Or,
823            "not" => TokenKind::Not,
824            "null" => TokenKind::NullLit,
825            "Null" => TokenKind::Null,
826            "result" => TokenKind::Result,
827            "ok" => TokenKind::Ok_,
828            "err" => TokenKind::Err_,
829            "list" => TokenKind::List,
830            "map" => TokenKind::Map,
831            "true" => TokenKind::BoolLit(true),
832            "false" => TokenKind::BoolLit(false),
833            // New keywords
834            "while" => TokenKind::While,
835            "loop" => TokenKind::Loop,
836            "break" => TokenKind::Break,
837            "continue" => TokenKind::Continue,
838            "mut" => TokenKind::Mut,
839            "const" => TokenKind::Const,
840            "pub" => TokenKind::Pub,
841            "import" => TokenKind::Import,
842            "from" => TokenKind::From,
843            "async" => TokenKind::Async,
844            "await" => TokenKind::Await,
845            "parallel" => TokenKind::Parallel,
846            "fn" => TokenKind::Fn,
847            "trait" => TokenKind::Trait,
848            "impl" => TokenKind::Impl,
849            "type" => TokenKind::Type,
850            "set" => TokenKind::Set,
851            "tuple" => TokenKind::Tuple,
852            "emit" => TokenKind::Emit,
853            "yield" => TokenKind::Yield,
854            "mod" => TokenKind::Mod,
855            "self" => TokenKind::SelfKw,
856            "with" => TokenKind::With,
857            "try" => TokenKind::Try,
858            "union" => TokenKind::Union,
859            "step" => TokenKind::Step,
860            "comptime" => TokenKind::Comptime,
861            "macro" => TokenKind::Macro,
862            "extern" => TokenKind::Extern,
863            "then" => TokenKind::Then,
864            "when" => TokenKind::When,
865            "is" => TokenKind::Is,
866            "defer" => TokenKind::Defer,
867            // Type keywords
868            "bool" => TokenKind::Bool,
869            "int" => TokenKind::Int_,
870            "float" => TokenKind::Float_,
871            "string" => TokenKind::String_,
872            "bytes" => TokenKind::Bytes,
873            "json" => TokenKind::Json,
874            _ => TokenKind::Ident(id),
875        };
876        Token::new(kind, span)
877    }
878
879    fn single(&mut self, kind: TokenKind) -> Token {
880        let span = self.span_here();
881        self.advance();
882        Token::new(kind, span)
883    }
884
885    pub fn tokenize(&mut self) -> Result<Vec<Token>, LexError> {
886        let mut tokens = Vec::new();
887        while self.pos < self.source.len() {
888            if self.at_line_start {
889                self.at_line_start = false;
890                self.handle_indentation()?;
891                tokens.append(&mut self.pending);
892            }
893            let ch = match self.current() {
894                Some(c) => c,
895                None => break,
896            };
897            match ch {
898                '\n' => {
899                    let span = self.span_here();
900                    self.advance();
901                    if !matches!(
902                        tokens.last().map(|t| &t.kind),
903                        Some(TokenKind::Newline) | Some(TokenKind::Indent) | None
904                    ) {
905                        tokens.push(Token::new(TokenKind::Newline, span));
906                    }
907                }
908                ' ' | '\t' | '\r' => {
909                    while matches!(self.current(), Some(' ' | '\t' | '\r')) {
910                        self.advance();
911                    }
912                }
913                '#' => {
914                    while matches!(self.current(), Some(c) if c != '\n') {
915                        self.advance();
916                    }
917                }
918                '"' => tokens.push(self.read_string()?),
919                '0'..='9' => tokens.push(self.read_number()?),
920                'r' if self.peek() == Some('"') => tokens.push(self.read_raw_string()?),
921                'b' if self.peek() == Some('"') => tokens.push(self.read_bytes_literal()?),
922                'a'..='z' | 'A'..='Z' | '_' => tokens.push(self.read_ident()),
923                '+' => {
924                    let (so, sl, sc) = (self.byte_offset, self.line, self.col);
925                    self.advance();
926                    match self.current() {
927                        Some('=') => {
928                            self.advance();
929                            tokens.push(Token::new(
930                                TokenKind::PlusAssign,
931                                self.span_from(so, sl, sc),
932                            ));
933                        }
934                        Some('+') => {
935                            self.advance();
936                            tokens
937                                .push(Token::new(TokenKind::PlusPlus, self.span_from(so, sl, sc)));
938                        }
939                        _ => {
940                            tokens.push(Token::new(TokenKind::Plus, self.span_from(so, sl, sc)));
941                        }
942                    }
943                }
944                '-' => {
945                    let (so, sl, sc) = (self.byte_offset, self.line, self.col);
946                    self.advance();
947                    match self.current() {
948                        Some('>') => {
949                            self.advance();
950                            tokens.push(Token::new(TokenKind::Arrow, self.span_from(so, sl, sc)));
951                        }
952                        Some('=') => {
953                            self.advance();
954                            tokens.push(Token::new(
955                                TokenKind::MinusAssign,
956                                self.span_from(so, sl, sc),
957                            ));
958                        }
959                        _ => {
960                            tokens.push(Token::new(TokenKind::Minus, self.span_from(so, sl, sc)));
961                        }
962                    }
963                }
964                '*' => {
965                    let (so, sl, sc) = (self.byte_offset, self.line, self.col);
966                    self.advance();
967                    match self.current() {
968                        Some('*') => {
969                            self.advance();
970                            if self.current() == Some('=') {
971                                self.advance();
972                                tokens.push(Token::new(
973                                    TokenKind::StarStarAssign,
974                                    self.span_from(so, sl, sc),
975                                ));
976                            } else {
977                                tokens.push(Token::new(
978                                    TokenKind::StarStar,
979                                    self.span_from(so, sl, sc),
980                                ));
981                            }
982                        }
983                        Some('=') => {
984                            self.advance();
985                            tokens.push(Token::new(
986                                TokenKind::StarAssign,
987                                self.span_from(so, sl, sc),
988                            ));
989                        }
990                        _ => {
991                            tokens.push(Token::new(TokenKind::Star, self.span_from(so, sl, sc)));
992                        }
993                    }
994                }
995                '/' => {
996                    let (so, sl, sc) = (self.byte_offset, self.line, self.col);
997                    self.advance();
998                    match self.current() {
999                        Some('/') => {
1000                            self.advance();
1001                            if self.current() == Some('=') {
1002                                self.advance();
1003                                tokens.push(Token::new(
1004                                    TokenKind::FloorDivAssign,
1005                                    self.span_from(so, sl, sc),
1006                                ));
1007                            } else {
1008                                tokens.push(Token::new(
1009                                    TokenKind::FloorDiv,
1010                                    self.span_from(so, sl, sc),
1011                                ));
1012                            }
1013                        }
1014                        Some('=') => {
1015                            self.advance();
1016                            tokens.push(Token::new(
1017                                TokenKind::SlashAssign,
1018                                self.span_from(so, sl, sc),
1019                            ));
1020                        }
1021                        _ => {
1022                            tokens.push(Token::new(TokenKind::Slash, self.span_from(so, sl, sc)));
1023                        }
1024                    }
1025                }
1026                '%' => {
1027                    let (so, sl, sc) = (self.byte_offset, self.line, self.col);
1028                    self.advance();
1029                    if self.current() == Some('=') {
1030                        self.advance();
1031                        tokens.push(Token::new(
1032                            TokenKind::PercentAssign,
1033                            self.span_from(so, sl, sc),
1034                        ));
1035                    } else {
1036                        tokens.push(Token::new(TokenKind::Percent, self.span_from(so, sl, sc)));
1037                    }
1038                }
1039                '=' => {
1040                    let (so, sl, sc) = (self.byte_offset, self.line, self.col);
1041                    self.advance();
1042                    match self.current() {
1043                        Some('=') => {
1044                            self.advance();
1045                            tokens.push(Token::new(TokenKind::Eq, self.span_from(so, sl, sc)));
1046                        }
1047                        Some('>') => {
1048                            self.advance();
1049                            tokens
1050                                .push(Token::new(TokenKind::FatArrow, self.span_from(so, sl, sc)));
1051                        }
1052                        _ => {
1053                            tokens.push(Token::new(TokenKind::Assign, self.span_from(so, sl, sc)));
1054                        }
1055                    }
1056                }
1057                '!' => {
1058                    let (so, sl, sc) = (self.byte_offset, self.line, self.col);
1059                    self.advance();
1060                    match self.current() {
1061                        Some('=') => {
1062                            self.advance();
1063                            tokens.push(Token::new(TokenKind::NotEq, self.span_from(so, sl, sc)));
1064                        }
1065                        _ => {
1066                            tokens.push(Token::new(TokenKind::Bang, self.span_from(so, sl, sc)));
1067                        }
1068                    }
1069                }
1070                '?' => {
1071                    let (so, sl, sc) = (self.byte_offset, self.line, self.col);
1072                    self.advance();
1073                    match self.current() {
1074                        Some('?') => {
1075                            self.advance();
1076                            tokens.push(Token::new(
1077                                TokenKind::QuestionQuestion,
1078                                self.span_from(so, sl, sc),
1079                            ));
1080                        }
1081                        Some('.') => {
1082                            self.advance();
1083                            tokens.push(Token::new(
1084                                TokenKind::QuestionDot,
1085                                self.span_from(so, sl, sc),
1086                            ));
1087                        }
1088                        Some('[') => {
1089                            self.advance();
1090                            tokens.push(Token::new(
1091                                TokenKind::QuestionBracket,
1092                                self.span_from(so, sl, sc),
1093                            ));
1094                        }
1095                        _ => {
1096                            tokens
1097                                .push(Token::new(TokenKind::Question, self.span_from(so, sl, sc)));
1098                        }
1099                    }
1100                }
1101                '<' => {
1102                    let (so, sl, sc) = (self.byte_offset, self.line, self.col);
1103                    self.advance();
1104                    match self.current() {
1105                        Some('=') => {
1106                            self.advance();
1107                            tokens.push(Token::new(TokenKind::LtEq, self.span_from(so, sl, sc)));
1108                        }
1109                        Some('<') => {
1110                            self.advance();
1111                            tokens
1112                                .push(Token::new(TokenKind::LeftShift, self.span_from(so, sl, sc)));
1113                        }
1114                        _ => {
1115                            tokens.push(Token::new(TokenKind::Lt, self.span_from(so, sl, sc)));
1116                        }
1117                    }
1118                }
1119                '>' => {
1120                    let (so, sl, sc) = (self.byte_offset, self.line, self.col);
1121                    self.advance();
1122                    match self.current() {
1123                        Some('=') => {
1124                            self.advance();
1125                            tokens.push(Token::new(TokenKind::GtEq, self.span_from(so, sl, sc)));
1126                        }
1127                        Some('>') => {
1128                            self.advance();
1129                            tokens.push(Token::new(
1130                                TokenKind::RightShift,
1131                                self.span_from(so, sl, sc),
1132                            ));
1133                        }
1134                        _ => {
1135                            tokens.push(Token::new(TokenKind::Gt, self.span_from(so, sl, sc)));
1136                        }
1137                    }
1138                }
1139                '.' => {
1140                    let (so, sl, sc) = (self.byte_offset, self.line, self.col);
1141                    self.advance();
1142                    if self.current() == Some('.') {
1143                        self.advance();
1144                        if self.current() == Some('.') {
1145                            self.advance();
1146                            tokens
1147                                .push(Token::new(TokenKind::DotDotDot, self.span_from(so, sl, sc)));
1148                        } else if self.current() == Some('=') {
1149                            self.advance();
1150                            tokens
1151                                .push(Token::new(TokenKind::DotDotEq, self.span_from(so, sl, sc)));
1152                        } else {
1153                            tokens.push(Token::new(TokenKind::DotDot, self.span_from(so, sl, sc)));
1154                        }
1155                    } else {
1156                        tokens.push(Token::new(TokenKind::Dot, self.span_from(so, sl, sc)));
1157                    }
1158                }
1159                ',' => tokens.push(self.single(TokenKind::Comma)),
1160                ':' => tokens.push(self.single(TokenKind::Colon)),
1161                ';' => tokens.push(self.single(TokenKind::Semicolon)),
1162                '|' => {
1163                    let (so, sl, sc) = (self.byte_offset, self.line, self.col);
1164                    self.advance();
1165                    match self.current() {
1166                        Some('>') => {
1167                            self.advance();
1168                            tokens.push(Token::new(
1169                                TokenKind::PipeForward,
1170                                self.span_from(so, sl, sc),
1171                            ));
1172                        }
1173                        Some('=') => {
1174                            self.advance();
1175                            tokens.push(Token::new(
1176                                TokenKind::PipeAssign,
1177                                self.span_from(so, sl, sc),
1178                            ));
1179                        }
1180                        _ => {
1181                            tokens.push(Token::new(TokenKind::Pipe, self.span_from(so, sl, sc)));
1182                        }
1183                    }
1184                }
1185                '@' => tokens.push(self.single(TokenKind::At)),
1186                '&' => {
1187                    let (so, sl, sc) = (self.byte_offset, self.line, self.col);
1188                    self.advance();
1189                    if self.current() == Some('=') {
1190                        self.advance();
1191                        tokens.push(Token::new(TokenKind::AmpAssign, self.span_from(so, sl, sc)));
1192                    } else {
1193                        tokens.push(Token::new(TokenKind::Ampersand, self.span_from(so, sl, sc)));
1194                    }
1195                }
1196                '~' => {
1197                    let so = self.byte_offset;
1198                    let sl = self.line;
1199                    let sc = self.col;
1200                    self.advance();
1201                    match self.current() {
1202                        Some('>') => {
1203                            self.advance();
1204                            tokens.push(Token::new(
1205                                TokenKind::TildeArrow,
1206                                self.span_from(so, sl, sc),
1207                            ));
1208                        }
1209                        _ => {
1210                            tokens.push(Token::new(TokenKind::Tilde, self.span_from(so, sl, sc)));
1211                        }
1212                    }
1213                }
1214                '^' => {
1215                    let (so, sl, sc) = (self.byte_offset, self.line, self.col);
1216                    self.advance();
1217                    if self.current() == Some('=') {
1218                        self.advance();
1219                        tokens.push(Token::new(
1220                            TokenKind::CaretAssign,
1221                            self.span_from(so, sl, sc),
1222                        ));
1223                    } else {
1224                        tokens.push(Token::new(TokenKind::Caret, self.span_from(so, sl, sc)));
1225                    }
1226                }
1227                '(' => tokens.push(self.single(TokenKind::LParen)),
1228                ')' => tokens.push(self.single(TokenKind::RParen)),
1229                '[' => tokens.push(self.single(TokenKind::LBracket)),
1230                ']' => tokens.push(self.single(TokenKind::RBracket)),
1231                '{' => tokens.push(self.single(TokenKind::LBrace)),
1232                '}' => tokens.push(self.single(TokenKind::RBrace)),
1233                '\\' if self.peek() == Some('\n') => {
1234                    // Line continuation
1235                    self.advance(); // skip backslash
1236                    self.advance(); // skip newline
1237                                    // Don't emit newline, don't set at_line_start
1238                    self.at_line_start = false;
1239                    // Skip leading whitespace on the continuation line
1240                    while matches!(self.current(), Some(' ' | '\t')) {
1241                        self.advance();
1242                    }
1243                }
1244                c => tokens.push(self.single(TokenKind::Symbol(c))),
1245            }
1246        }
1247        while self.indent_stack.len() > 1 {
1248            self.indent_stack.pop();
1249            tokens.push(Token::new(TokenKind::Dedent, self.span_here()));
1250        }
1251        tokens.push(Token::new(TokenKind::Eof, self.span_here()));
1252        Ok(tokens)
1253    }
1254}
1255
1256#[cfg(test)]
1257mod tests {
1258    use super::*;
1259
1260    #[test]
1261    fn test_lex_cell() {
1262        let src = "cell main() -> Int\n  return 42\nend";
1263        let mut lexer = Lexer::new(src, 1, 0);
1264        let tokens = lexer.tokenize().unwrap();
1265        assert!(matches!(&tokens[0].kind, TokenKind::Cell));
1266        assert!(matches!(&tokens[1].kind, TokenKind::Ident(s) if s == "main"));
1267    }
1268
1269    #[test]
1270    fn test_lex_operators() {
1271        let src = "a + b == c";
1272        let mut lexer = Lexer::new(src, 1, 0);
1273        let tokens = lexer.tokenize().unwrap();
1274        assert!(matches!(&tokens[1].kind, TokenKind::Plus));
1275        assert!(matches!(&tokens[3].kind, TokenKind::Eq));
1276    }
1277
1278    #[test]
1279    fn test_lex_string() {
1280        let mut lexer = Lexer::new(r#""hello""#, 1, 0);
1281        let tokens = lexer.tokenize().unwrap();
1282        assert!(matches!(&tokens[0].kind, TokenKind::StringLit(s) if s == "hello"));
1283    }
1284
1285    #[test]
1286    fn test_lex_indent() {
1287        let mut lexer = Lexer::new("if x\n  return 1\nend", 1, 0);
1288        let tokens = lexer.tokenize().unwrap();
1289        let kinds: Vec<_> = tokens.iter().map(|t| &t.kind).collect();
1290        assert!(kinds.contains(&&TokenKind::Indent));
1291        assert!(kinds.contains(&&TokenKind::Dedent));
1292    }
1293
1294    #[test]
1295    fn test_lex_hex_number() {
1296        let mut lexer = Lexer::new("0xFF", 1, 0);
1297        let tokens = lexer.tokenize().unwrap();
1298        assert!(matches!(&tokens[0].kind, TokenKind::IntLit(255)));
1299    }
1300
1301    #[test]
1302    fn test_lex_bin_number() {
1303        let mut lexer = Lexer::new("0b1010", 1, 0);
1304        let tokens = lexer.tokenize().unwrap();
1305        assert!(matches!(&tokens[0].kind, TokenKind::IntLit(10)));
1306    }
1307
1308    #[test]
1309    fn test_lex_oct_number() {
1310        let mut lexer = Lexer::new("0o777", 1, 0);
1311        let tokens = lexer.tokenize().unwrap();
1312        assert!(matches!(&tokens[0].kind, TokenKind::IntLit(511)));
1313    }
1314
1315    #[test]
1316    fn test_lex_scientific() {
1317        let mut lexer = Lexer::new("1e10", 1, 0);
1318        let tokens = lexer.tokenize().unwrap();
1319        assert!(matches!(&tokens[0].kind, TokenKind::FloatLit(f) if *f == 1e10));
1320    }
1321
1322    #[test]
1323    fn test_lex_compound_assign() {
1324        let mut lexer = Lexer::new("+= -= *= /=", 1, 0);
1325        let tokens = lexer.tokenize().unwrap();
1326        assert!(matches!(&tokens[0].kind, TokenKind::PlusAssign));
1327        assert!(matches!(&tokens[1].kind, TokenKind::MinusAssign));
1328        assert!(matches!(&tokens[2].kind, TokenKind::StarAssign));
1329        assert!(matches!(&tokens[3].kind, TokenKind::SlashAssign));
1330    }
1331
1332    #[test]
1333    fn test_lex_new_operators() {
1334        let mut lexer = Lexer::new("** .. ..= |> >> ?? ?. ! ? ... => ++ & ~ ^", 1, 0);
1335        let tokens = lexer.tokenize().unwrap();
1336        assert!(matches!(&tokens[0].kind, TokenKind::StarStar));
1337        assert!(matches!(&tokens[1].kind, TokenKind::DotDot));
1338        assert!(matches!(&tokens[2].kind, TokenKind::DotDotEq));
1339        assert!(matches!(&tokens[3].kind, TokenKind::PipeForward));
1340        assert!(matches!(&tokens[4].kind, TokenKind::RightShift));
1341        assert!(matches!(&tokens[5].kind, TokenKind::QuestionQuestion));
1342        assert!(matches!(&tokens[6].kind, TokenKind::QuestionDot));
1343        assert!(matches!(&tokens[7].kind, TokenKind::Bang));
1344        assert!(matches!(&tokens[8].kind, TokenKind::Question));
1345        assert!(matches!(&tokens[9].kind, TokenKind::DotDotDot));
1346        assert!(matches!(&tokens[10].kind, TokenKind::FatArrow));
1347        assert!(matches!(&tokens[11].kind, TokenKind::PlusPlus));
1348        assert!(matches!(&tokens[12].kind, TokenKind::Ampersand));
1349        assert!(matches!(&tokens[13].kind, TokenKind::Tilde));
1350        assert!(matches!(&tokens[14].kind, TokenKind::Caret));
1351    }
1352
1353    #[test]
1354    fn test_lex_new_keywords() {
1355        let mut lexer = Lexer::new("while loop break continue mut const pub import from async await parallel fn trait impl type set tuple emit yield mod self with try union step comptime macro extern then when", 1, 0);
1356        let tokens = lexer.tokenize().unwrap();
1357        assert!(matches!(&tokens[0].kind, TokenKind::While));
1358        assert!(matches!(&tokens[1].kind, TokenKind::Loop));
1359        assert!(matches!(&tokens[2].kind, TokenKind::Break));
1360        assert!(matches!(&tokens[3].kind, TokenKind::Continue));
1361        assert!(matches!(&tokens[4].kind, TokenKind::Mut));
1362        assert!(matches!(&tokens[5].kind, TokenKind::Const));
1363        assert!(matches!(&tokens[6].kind, TokenKind::Pub));
1364        assert!(matches!(&tokens[7].kind, TokenKind::Import));
1365        assert!(matches!(&tokens[8].kind, TokenKind::From));
1366        assert!(matches!(&tokens[9].kind, TokenKind::Async));
1367        assert!(matches!(&tokens[10].kind, TokenKind::Await));
1368        assert!(matches!(&tokens[11].kind, TokenKind::Parallel));
1369        assert!(matches!(&tokens[12].kind, TokenKind::Fn));
1370        assert!(matches!(&tokens[13].kind, TokenKind::Trait));
1371        assert!(matches!(&tokens[14].kind, TokenKind::Impl));
1372        assert!(matches!(&tokens[15].kind, TokenKind::Type));
1373        assert!(matches!(&tokens[16].kind, TokenKind::Set));
1374        assert!(matches!(&tokens[17].kind, TokenKind::Tuple));
1375        assert!(matches!(&tokens[18].kind, TokenKind::Emit));
1376        assert!(matches!(&tokens[19].kind, TokenKind::Yield));
1377        assert!(matches!(&tokens[20].kind, TokenKind::Mod));
1378        assert!(matches!(&tokens[21].kind, TokenKind::SelfKw));
1379        assert!(matches!(&tokens[22].kind, TokenKind::With));
1380        assert!(matches!(&tokens[23].kind, TokenKind::Try));
1381        assert!(matches!(&tokens[24].kind, TokenKind::Union));
1382        assert!(matches!(&tokens[25].kind, TokenKind::Step));
1383        assert!(matches!(&tokens[26].kind, TokenKind::Comptime));
1384        assert!(matches!(&tokens[27].kind, TokenKind::Macro));
1385        assert!(matches!(&tokens[28].kind, TokenKind::Extern));
1386        assert!(matches!(&tokens[29].kind, TokenKind::Then));
1387        assert!(matches!(&tokens[30].kind, TokenKind::When));
1388    }
1389
1390    #[test]
1391    fn test_lex_raw_string() {
1392        let mut lexer = Lexer::new(r#"r"no \n here""#, 1, 0);
1393        let tokens = lexer.tokenize().unwrap();
1394        assert!(matches!(&tokens[0].kind, TokenKind::RawStringLit(s) if s == r"no \n here"));
1395    }
1396
1397    #[test]
1398    fn test_lex_bytes_literal() {
1399        let mut lexer = Lexer::new(r#"b"48656C6C6F""#, 1, 0);
1400        let tokens = lexer.tokenize().unwrap();
1401        assert!(
1402            matches!(&tokens[0].kind, TokenKind::BytesLit(b) if b == &[0x48, 0x65, 0x6C, 0x6C, 0x6F])
1403        );
1404    }
1405
1406    #[test]
1407    fn test_lex_fat_arrow() {
1408        let mut lexer = Lexer::new("=>", 1, 0);
1409        let tokens = lexer.tokenize().unwrap();
1410        assert!(matches!(&tokens[0].kind, TokenKind::FatArrow));
1411    }
1412
1413    #[test]
1414    fn test_lex_line_continuation() {
1415        let mut lexer = Lexer::new("a +\\\n  b", 1, 0);
1416        let tokens = lexer.tokenize().unwrap();
1417        let kinds: Vec<_> = tokens.iter().map(|t| &t.kind).collect();
1418        // Should NOT have a Newline between + and b
1419        assert!(!kinds.contains(&&TokenKind::Newline));
1420        assert!(matches!(&tokens[0].kind, TokenKind::Ident(s) if s == "a"));
1421        assert!(matches!(&tokens[1].kind, TokenKind::Plus));
1422        assert!(matches!(&tokens[2].kind, TokenKind::Ident(s) if s == "b"));
1423    }
1424
1425    #[test]
1426    fn test_lex_null_literal() {
1427        let mut lexer = Lexer::new("null", 1, 0);
1428        let tokens = lexer.tokenize().unwrap();
1429        assert!(matches!(&tokens[0].kind, TokenKind::NullLit));
1430    }
1431
1432    #[test]
1433    fn test_lex_unicode_escape() {
1434        let mut lexer = Lexer::new(r#""\u{0041}""#, 1, 0);
1435        let tokens = lexer.tokenize().unwrap();
1436        assert!(matches!(&tokens[0].kind, TokenKind::StringLit(s) if s == "A"));
1437    }
1438
1439    #[test]
1440    fn test_lex_hex_byte_escape() {
1441        let mut lexer = Lexer::new(r#""\x41""#, 1, 0);
1442        let tokens = lexer.tokenize().unwrap();
1443        assert!(matches!(&tokens[0].kind, TokenKind::StringLit(s) if s == "A"));
1444    }
1445}