Skip to main content

cel_core/parser/
lexer.rs

1//! CEL lexer using logos.
2
3use crate::types::Span;
4use logos::Logos;
5
6/// A token with its source span.
7pub type SpannedToken = (Token, Span);
8
9/// Lexer error with span information.
10#[derive(Debug, Clone, PartialEq)]
11pub struct LexError {
12    pub message: String,
13    pub span: Span,
14}
15
16/// CEL tokens.
17#[derive(Logos, Debug, Clone, PartialEq)]
18#[logos(skip r"[ \t\n\r\f]+")]
19#[logos(skip r"//[^\n]*")]
20pub enum Token {
21    // === Numeric Literals ===
22    // Order matters: more specific patterns first
23
24    // Hex unsigned: 0x1Fu, 0X1FU
25    #[regex(r"0[xX][0-9a-fA-F]+[uU]", lex_hex_uint)]
26    // Decimal unsigned: 123u, 123U
27    #[regex(r"[0-9]+[uU]", lex_decimal_uint, priority = 4)]
28    UInt(u64),
29
30    // Hex int: 0x1F, 0X1F
31    #[regex(r"0[xX][0-9a-fA-F]+", lex_hex_int, priority = 3)]
32    // Decimal int: 123 (lowest priority for numbers)
33    #[regex(r"[0-9]+", lex_decimal_int, priority = 1)]
34    Int(i64),
35
36    // Integer overflow - specifically handle the i64::MIN absolute value case
37    // 9223372036854775808 = |i64::MIN| = i64::MAX + 1
38    // This allows "-9223372036854775808" to parse correctly
39    #[token("9223372036854775808", |_| "9223372036854775808".to_string(), priority = 2)]
40    IntOverflow(String),
41
42    // Float with decimal point and optional exponent: 1.5, 1.5e10
43    #[regex(r"[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?", lex_float, priority = 5)]
44    // Float with exponent only: 1e10, 1E-5
45    #[regex(r"[0-9]+[eE][+-]?[0-9]+", lex_float, priority = 2)]
46    // Float starting with decimal point: .5, .99, .5e3
47    #[regex(r"\.[0-9]+([eE][+-]?[0-9]+)?", lex_float, priority = 6)]
48    Float(f64),
49
50    // === String Literals ===
51    // Triple-quoted strings (must come before single/double to match first)
52    #[regex(r#"""""#, lex_triple_double_string)]
53    #[regex(r"'''", lex_triple_single_string)]
54    // Raw triple-quoted strings (must come before raw single/double to match first)
55    #[regex(r#"[rR]""""#, lex_raw_triple_double_string)]
56    #[regex(r"[rR]'''", lex_raw_triple_single_string)]
57    // Raw strings
58    #[regex(r#"[rR]""#, lex_raw_double_string)]
59    #[regex(r"[rR]'", lex_raw_single_string)]
60    // Regular strings
61    #[regex(r#"""#, lex_double_string)]
62    #[regex(r"'", lex_single_string)]
63    String(String),
64
65    // === Bytes Literals ===
66    // Raw bytes triple-quoted (must come first)
67    #[regex(r#"[bB][rR]""""#, lex_raw_bytes_triple_double)]
68    #[regex(r"[bB][rR]'''", lex_raw_bytes_triple_single)]
69    #[regex(r#"[rR][bB]""""#, lex_raw_bytes_triple_double)]
70    #[regex(r"[rR][bB]'''", lex_raw_bytes_triple_single)]
71    // Triple-quoted bytes (must come before single/double)
72    #[regex(r#"[bB]""""#, lex_bytes_triple_double)]
73    #[regex(r"[bB]'''", lex_bytes_triple_single)]
74    // Raw bytes single/double-quoted
75    #[regex(r#"[bB][rR]""#, lex_raw_bytes_double)]
76    #[regex(r"[bB][rR]'", lex_raw_bytes_single)]
77    #[regex(r#"[rR][bB]""#, lex_raw_bytes_double)]
78    #[regex(r"[rR][bB]'", lex_raw_bytes_single)]
79    // Regular bytes
80    #[regex(r#"[bB]""#, lex_bytes_double)]
81    #[regex(r"[bB]'", lex_bytes_single)]
82    Bytes(Vec<u8>),
83
84    // === Keywords ===
85    #[token("true")]
86    True,
87    #[token("false")]
88    False,
89    #[token("null")]
90    Null,
91    #[token("in")]
92    In,
93
94    // === Reserved Words ===
95    #[token("as", |_| "as".to_string())]
96    #[token("break", |_| "break".to_string())]
97    #[token("const", |_| "const".to_string())]
98    #[token("continue", |_| "continue".to_string())]
99    #[token("else", |_| "else".to_string())]
100    #[token("for", |_| "for".to_string())]
101    #[token("function", |_| "function".to_string())]
102    #[token("if", |_| "if".to_string())]
103    #[token("import", |_| "import".to_string())]
104    #[token("let", |_| "let".to_string())]
105    #[token("loop", |_| "loop".to_string())]
106    #[token("package", |_| "package".to_string())]
107    #[token("namespace", |_| "namespace".to_string())]
108    #[token("return", |_| "return".to_string())]
109    #[token("var", |_| "var".to_string())]
110    #[token("void", |_| "void".to_string())]
111    #[token("while", |_| "while".to_string())]
112    Reserved(String),
113
114    // === Identifier ===
115    #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_string(), priority = 0)]
116    #[regex(r"`[^`]+`", lex_backtick_ident)]
117    Ident(String),
118
119    // === Operators (multi-char first) ===
120    #[token("==")]
121    EqEq,
122    #[token("!=")]
123    Ne,
124    #[token("<=")]
125    Le,
126    #[token(">=")]
127    Ge,
128    #[token("&&")]
129    And,
130    #[token("||")]
131    Or,
132
133    #[token("+")]
134    Plus,
135    #[token("-")]
136    Minus,
137    #[token("*")]
138    Star,
139    #[token("/")]
140    Slash,
141    #[token("%")]
142    Percent,
143    #[token("<")]
144    Lt,
145    #[token(">")]
146    Gt,
147    #[token("!")]
148    Not,
149    #[token("?")]
150    Question,
151    #[token(":")]
152    Colon,
153
154    // === Delimiters ===
155    #[token("(")]
156    LParen,
157    #[token(")")]
158    RParen,
159    #[token("[")]
160    LBracket,
161    #[token("]")]
162    RBracket,
163    #[token("{")]
164    LBrace,
165    #[token("}")]
166    RBrace,
167    #[token(".")]
168    Dot,
169    #[token(",")]
170    Comma,
171}
172
173impl std::fmt::Display for Token {
174    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
175        match self {
176            Token::Int(n) => write!(f, "{}", n),
177            Token::IntOverflow(s) => write!(f, "{}", s),
178            Token::UInt(n) => write!(f, "{}u", n),
179            Token::Float(n) => write!(f, "{}", n),
180            Token::String(s) => write!(f, "\"{}\"", s),
181            Token::Bytes(b) => write!(f, "b\"{}\"", String::from_utf8_lossy(b)),
182            Token::True => write!(f, "true"),
183            Token::False => write!(f, "false"),
184            Token::Null => write!(f, "null"),
185            Token::In => write!(f, "in"),
186            Token::Reserved(s) => write!(f, "{}", s),
187            Token::Ident(s) => write!(f, "{}", s),
188            Token::Plus => write!(f, "+"),
189            Token::Minus => write!(f, "-"),
190            Token::Star => write!(f, "*"),
191            Token::Slash => write!(f, "/"),
192            Token::Percent => write!(f, "%"),
193            Token::EqEq => write!(f, "=="),
194            Token::Ne => write!(f, "!="),
195            Token::Lt => write!(f, "<"),
196            Token::Le => write!(f, "<="),
197            Token::Gt => write!(f, ">"),
198            Token::Ge => write!(f, ">="),
199            Token::And => write!(f, "&&"),
200            Token::Or => write!(f, "||"),
201            Token::Not => write!(f, "!"),
202            Token::Question => write!(f, "?"),
203            Token::Colon => write!(f, ":"),
204            Token::LParen => write!(f, "("),
205            Token::RParen => write!(f, ")"),
206            Token::LBracket => write!(f, "["),
207            Token::RBracket => write!(f, "]"),
208            Token::LBrace => write!(f, "{{"),
209            Token::RBrace => write!(f, "}}"),
210            Token::Dot => write!(f, "."),
211            Token::Comma => write!(f, ","),
212        }
213    }
214}
215
216// === Lexer Callbacks for Numbers ===
217
218fn lex_decimal_int(lex: &mut logos::Lexer<Token>) -> Option<i64> {
219    lex.slice().parse().ok()
220}
221
222fn lex_decimal_uint(lex: &mut logos::Lexer<Token>) -> Option<u64> {
223    let s = lex.slice();
224    s[..s.len() - 1].parse().ok() // Remove trailing u/U
225}
226
227fn lex_hex_int(lex: &mut logos::Lexer<Token>) -> Option<i64> {
228    let s = lex.slice();
229    i64::from_str_radix(&s[2..], 16).ok() // Skip 0x
230}
231
232fn lex_hex_uint(lex: &mut logos::Lexer<Token>) -> Option<u64> {
233    let s = lex.slice();
234    u64::from_str_radix(&s[2..s.len() - 1], 16).ok() // Skip 0x, remove u
235}
236
237fn lex_float(lex: &mut logos::Lexer<Token>) -> Option<f64> {
238    lex.slice().parse().ok()
239}
240
241fn lex_backtick_ident(lex: &mut logos::Lexer<Token>) -> String {
242    let s = lex.slice();
243    s[1..s.len() - 1].to_string() // Strip backticks
244}
245
246// === Lexer Callbacks for Strings ===
247
248fn lex_double_string(lex: &mut logos::Lexer<Token>) -> Option<String> {
249    lex_quoted_string(lex, '"')
250}
251
252fn lex_single_string(lex: &mut logos::Lexer<Token>) -> Option<String> {
253    lex_quoted_string(lex, '\'')
254}
255
256fn lex_quoted_string(lex: &mut logos::Lexer<Token>, quote: char) -> Option<String> {
257    let remainder = lex.remainder();
258    let mut chars = remainder.chars().peekable();
259    let mut result = std::string::String::new();
260    let mut consumed = 0;
261
262    while let Some(c) = chars.next() {
263        consumed += c.len_utf8();
264        if c == quote {
265            lex.bump(consumed);
266            return Some(result);
267        } else if c == '\\' {
268            let escape_char = chars.next()?;
269            consumed += escape_char.len_utf8();
270            match escape_char {
271                '\\' => result.push('\\'),
272                '/' => result.push('/'),
273                '"' => result.push('"'),
274                '\'' => result.push('\''),
275                '`' => result.push('`'),
276                'a' => result.push('\x07'),
277                'b' => result.push('\x08'),
278                'f' => result.push('\x0C'),
279                'n' => result.push('\n'),
280                'r' => result.push('\r'),
281                't' => result.push('\t'),
282                'v' => result.push('\x0B'),
283                '?' => result.push('?'),
284                'x' | 'X' => {
285                    // \xHH or \XHH - 2 hex digits
286                    let h1 = chars.next()?;
287                    let h2 = chars.next()?;
288                    consumed += h1.len_utf8() + h2.len_utf8();
289                    let hex = format!("{}{}", h1, h2);
290                    let val = u8::from_str_radix(&hex, 16).ok()?;
291                    result.push(val as char);
292                }
293                'u' => {
294                    // \uXXXX - 4 hex digits
295                    let hex: String = chars.by_ref().take(4).collect();
296                    consumed += hex.len();
297                    if hex.len() != 4 {
298                        return None;
299                    }
300                    let val = u32::from_str_radix(&hex, 16).ok()?;
301                    result.push(char::from_u32(val)?);
302                }
303                'U' => {
304                    // \UXXXXXXXX - 8 hex digits
305                    let hex: String = chars.by_ref().take(8).collect();
306                    consumed += hex.len();
307                    if hex.len() != 8 {
308                        return None;
309                    }
310                    let val = u32::from_str_radix(&hex, 16).ok()?;
311                    result.push(char::from_u32(val)?);
312                }
313                c @ '0'..='3' => {
314                    // \DDD - octal (first digit 0-3, then 2 more digits)
315                    let d2 = chars.next()?;
316                    let d3 = chars.next()?;
317                    consumed += d2.len_utf8() + d3.len_utf8();
318                    if !matches!(d2, '0'..='7') || !matches!(d3, '0'..='7') {
319                        return None;
320                    }
321                    let octal = format!("{}{}{}", c, d2, d3);
322                    let val = u8::from_str_radix(&octal, 8).ok()?;
323                    result.push(val as char);
324                }
325                _ => return None, // Invalid escape
326            }
327        } else if c == '\n' {
328            // Newline not allowed in regular strings
329            return None;
330        } else {
331            result.push(c);
332        }
333    }
334
335    None // Unclosed string
336}
337
338fn lex_raw_double_string(lex: &mut logos::Lexer<Token>) -> Option<String> {
339    lex_raw_string(lex, '"')
340}
341
342fn lex_raw_single_string(lex: &mut logos::Lexer<Token>) -> Option<String> {
343    lex_raw_string(lex, '\'')
344}
345
346fn lex_raw_string(lex: &mut logos::Lexer<Token>, quote: char) -> Option<String> {
347    let remainder = lex.remainder();
348    let mut result = std::string::String::new();
349    let mut consumed = 0;
350
351    for c in remainder.chars() {
352        consumed += c.len_utf8();
353        if c == quote {
354            lex.bump(consumed);
355            return Some(result);
356        }
357        result.push(c);
358    }
359
360    None // Unclosed string
361}
362
363fn lex_triple_double_string(lex: &mut logos::Lexer<Token>) -> Option<String> {
364    lex_triple_string(lex, "\"\"\"")
365}
366
367fn lex_triple_single_string(lex: &mut logos::Lexer<Token>) -> Option<String> {
368    lex_triple_string(lex, "'''")
369}
370
371fn lex_triple_string(lex: &mut logos::Lexer<Token>, end_quote: &str) -> Option<String> {
372    let remainder = lex.remainder();
373
374    if let Some(end_pos) = remainder.find(end_quote) {
375        let content = &remainder[..end_pos];
376        lex.bump(end_pos + end_quote.len());
377        Some(content.to_string())
378    } else {
379        None // Unclosed triple-quoted string
380    }
381}
382
383fn lex_raw_triple_double_string(lex: &mut logos::Lexer<Token>) -> Option<String> {
384    lex_triple_string(lex, "\"\"\"")
385}
386
387fn lex_raw_triple_single_string(lex: &mut logos::Lexer<Token>) -> Option<String> {
388    lex_triple_string(lex, "'''")
389}
390
391// === Lexer Callbacks for Bytes ===
392
393fn lex_bytes_double(lex: &mut logos::Lexer<Token>) -> Option<Vec<u8>> {
394    lex_quoted_string(lex, '"').map(|s| s.into_bytes())
395}
396
397fn lex_bytes_single(lex: &mut logos::Lexer<Token>) -> Option<Vec<u8>> {
398    lex_quoted_string(lex, '\'').map(|s| s.into_bytes())
399}
400
401fn lex_bytes_triple_double(lex: &mut logos::Lexer<Token>) -> Option<Vec<u8>> {
402    lex_triple_string(lex, "\"\"\"").map(|s| s.into_bytes())
403}
404
405fn lex_bytes_triple_single(lex: &mut logos::Lexer<Token>) -> Option<Vec<u8>> {
406    lex_triple_string(lex, "'''").map(|s| s.into_bytes())
407}
408
409fn lex_raw_bytes_double(lex: &mut logos::Lexer<Token>) -> Option<Vec<u8>> {
410    lex_raw_string(lex, '"').map(|s| s.into_bytes())
411}
412
413fn lex_raw_bytes_single(lex: &mut logos::Lexer<Token>) -> Option<Vec<u8>> {
414    lex_raw_string(lex, '\'').map(|s| s.into_bytes())
415}
416
417fn lex_raw_bytes_triple_double(lex: &mut logos::Lexer<Token>) -> Option<Vec<u8>> {
418    lex_triple_string(lex, "\"\"\"").map(|s| s.into_bytes())
419}
420
421fn lex_raw_bytes_triple_single(lex: &mut logos::Lexer<Token>) -> Option<Vec<u8>> {
422    lex_triple_string(lex, "'''").map(|s| s.into_bytes())
423}
424
425// === Public Lexer API ===
426
427/// Tokenize the input string.
428pub fn lex(input: &str) -> Result<Vec<SpannedToken>, LexError> {
429    let mut tokens = Vec::new();
430    let mut lexer = Token::lexer(input);
431
432    while let Some(result) = lexer.next() {
433        let span = lexer.span();
434        match result {
435            Ok(token) => tokens.push((token, span)),
436            Err(_) => {
437                return Err(LexError {
438                    message: format!("unexpected character '{}'", &input[span.clone()]),
439                    span,
440                })
441            }
442        }
443    }
444
445    Ok(tokens)
446}
447
448#[cfg(test)]
449mod tests {
450    use super::*;
451
452    fn lex_tokens(input: &str) -> Vec<Token> {
453        lex(input)
454            .unwrap()
455            .into_iter()
456            .map(|(tok, _)| tok)
457            .collect()
458    }
459
460    #[test]
461    fn lex_integers() {
462        assert_eq!(lex_tokens("123"), vec![Token::Int(123)]);
463        assert_eq!(lex_tokens("0"), vec![Token::Int(0)]);
464        assert_eq!(lex_tokens("0x1F"), vec![Token::Int(31)]);
465        assert_eq!(lex_tokens("0XAB"), vec![Token::Int(171)]);
466    }
467
468    #[test]
469    fn lex_unsigned_integers() {
470        assert_eq!(lex_tokens("123u"), vec![Token::UInt(123)]);
471        assert_eq!(lex_tokens("123U"), vec![Token::UInt(123)]);
472        assert_eq!(lex_tokens("0x1Fu"), vec![Token::UInt(31)]);
473    }
474
475    #[test]
476    fn lex_floats() {
477        assert_eq!(lex_tokens("1.5"), vec![Token::Float(1.5)]);
478        assert_eq!(lex_tokens("1e10"), vec![Token::Float(1e10)]);
479        assert_eq!(lex_tokens("1.5e-3"), vec![Token::Float(1.5e-3)]);
480    }
481
482    #[test]
483    fn lex_strings() {
484        assert_eq!(
485            lex_tokens(r#""hello""#),
486            vec![Token::String("hello".to_string())]
487        );
488        assert_eq!(
489            lex_tokens("'world'"),
490            vec![Token::String("world".to_string())]
491        );
492        assert_eq!(
493            lex_tokens(r#""hello\nworld""#),
494            vec![Token::String("hello\nworld".to_string())]
495        );
496    }
497
498    #[test]
499    fn lex_raw_strings() {
500        assert_eq!(
501            lex_tokens(r#"r"hello\n""#),
502            vec![Token::String(r"hello\n".to_string())]
503        );
504        assert_eq!(
505            lex_tokens(r"r'hello\n'"),
506            vec![Token::String(r"hello\n".to_string())]
507        );
508    }
509
510    #[test]
511    fn lex_triple_strings() {
512        assert_eq!(
513            lex_tokens(
514                r#""""multi
515line""""#
516            ),
517            vec![Token::String("multi\nline".to_string())]
518        );
519    }
520
521    #[test]
522    fn lex_bytes() {
523        assert_eq!(
524            lex_tokens(r#"b"hello""#),
525            vec![Token::Bytes(b"hello".to_vec())]
526        );
527        assert_eq!(
528            lex_tokens("b'world'"),
529            vec![Token::Bytes(b"world".to_vec())]
530        );
531    }
532
533    #[test]
534    fn lex_keywords() {
535        assert_eq!(lex_tokens("true"), vec![Token::True]);
536        assert_eq!(lex_tokens("false"), vec![Token::False]);
537        assert_eq!(lex_tokens("null"), vec![Token::Null]);
538        assert_eq!(lex_tokens("in"), vec![Token::In]);
539    }
540
541    #[test]
542    fn lex_identifiers() {
543        assert_eq!(
544            lex_tokens("foo"),
545            vec![Token::Ident("foo".to_string())]
546        );
547        assert_eq!(
548            lex_tokens("_bar"),
549            vec![Token::Ident("_bar".to_string())]
550        );
551        assert_eq!(
552            lex_tokens("baz123"),
553            vec![Token::Ident("baz123".to_string())]
554        );
555    }
556
557    #[test]
558    fn lex_operators() {
559        assert_eq!(
560            lex_tokens("+ - * / %"),
561            vec![
562                Token::Plus,
563                Token::Minus,
564                Token::Star,
565                Token::Slash,
566                Token::Percent
567            ]
568        );
569        assert_eq!(
570            lex_tokens("== != < <= > >="),
571            vec![
572                Token::EqEq,
573                Token::Ne,
574                Token::Lt,
575                Token::Le,
576                Token::Gt,
577                Token::Ge
578            ]
579        );
580        assert_eq!(
581            lex_tokens("&& || !"),
582            vec![Token::And, Token::Or, Token::Not]
583        );
584        assert_eq!(lex_tokens("? :"), vec![Token::Question, Token::Colon]);
585    }
586
587    #[test]
588    fn lex_delimiters() {
589        assert_eq!(
590            lex_tokens("( ) [ ] { } . ,"),
591            vec![
592                Token::LParen,
593                Token::RParen,
594                Token::LBracket,
595                Token::RBracket,
596                Token::LBrace,
597                Token::RBrace,
598                Token::Dot,
599                Token::Comma
600            ]
601        );
602    }
603
604    #[test]
605    fn lex_expression() {
606        assert_eq!(
607            lex_tokens("a + b * 2"),
608            vec![
609                Token::Ident("a".to_string()),
610                Token::Plus,
611                Token::Ident("b".to_string()),
612                Token::Star,
613                Token::Int(2)
614            ]
615        );
616    }
617
618    #[test]
619    fn lex_with_comments() {
620        assert_eq!(
621            lex_tokens("a // comment\n+ b"),
622            vec![
623                Token::Ident("a".to_string()),
624                Token::Plus,
625                Token::Ident("b".to_string())
626            ]
627        );
628    }
629
630    #[test]
631    fn lex_unicode_escapes() {
632        // \uXXXX - 4 hex digits
633        assert_eq!(
634            lex_tokens(r#""\u0041""#),
635            vec![Token::String("A".to_string())]
636        );
637        assert_eq!(
638            lex_tokens(r#""\u03B1""#), // Greek alpha
639            vec![Token::String("α".to_string())]
640        );
641        // \UXXXXXXXX - 8 hex digits
642        assert_eq!(
643            lex_tokens(r#""\U00000041""#),
644            vec![Token::String("A".to_string())]
645        );
646        assert_eq!(
647            lex_tokens(r#""\U0001F600""#), // Emoji
648            vec![Token::String("😀".to_string())]
649        );
650    }
651
652    #[test]
653    fn lex_octal_escapes() {
654        // \DDD - 3 octal digits (000-377)
655        assert_eq!(
656            lex_tokens(r#""\101""#), // 'A' = 65 = 0o101
657            vec![Token::String("A".to_string())]
658        );
659        assert_eq!(
660            lex_tokens(r#""\000""#), // NUL
661            vec![Token::String("\0".to_string())]
662        );
663        assert_eq!(
664            lex_tokens(r#""\377""#), // 255 = 0o377
665            vec![Token::String("\u{FF}".to_string())]
666        );
667    }
668
669    #[test]
670    fn lex_reserved_words() {
671        // Reserved words should produce Token::Reserved, not Token::Ident
672        assert_eq!(
673            lex_tokens("if"),
674            vec![Token::Reserved("if".to_string())]
675        );
676        assert_eq!(
677            lex_tokens("else"),
678            vec![Token::Reserved("else".to_string())]
679        );
680        assert_eq!(
681            lex_tokens("for"),
682            vec![Token::Reserved("for".to_string())]
683        );
684        assert_eq!(
685            lex_tokens("while"),
686            vec![Token::Reserved("while".to_string())]
687        );
688        assert_eq!(
689            lex_tokens("return"),
690            vec![Token::Reserved("return".to_string())]
691        );
692        assert_eq!(
693            lex_tokens("let"),
694            vec![Token::Reserved("let".to_string())]
695        );
696        assert_eq!(
697            lex_tokens("const"),
698            vec![Token::Reserved("const".to_string())]
699        );
700        assert_eq!(
701            lex_tokens("var"),
702            vec![Token::Reserved("var".to_string())]
703        );
704        assert_eq!(
705            lex_tokens("function"),
706            vec![Token::Reserved("function".to_string())]
707        );
708        assert_eq!(
709            lex_tokens("namespace"),
710            vec![Token::Reserved("namespace".to_string())]
711        );
712    }
713
714    #[test]
715    fn lex_integer_overflow() {
716        // i64::MAX = 9223372036854775807 (fits)
717        assert_eq!(
718            lex_tokens("9223372036854775807"),
719            vec![Token::Int(9223372036854775807)]
720        );
721
722        // i64::MAX + 1 = 9223372036854775808 (overflow - this is |i64::MIN|)
723        // This specific value is needed for "-9223372036854775808" to work
724        assert_eq!(
725            lex_tokens("9223372036854775808"),
726            vec![Token::IntOverflow("9223372036854775808".to_string())]
727        );
728
729        // Normal 19-digit number that fits
730        assert_eq!(
731            lex_tokens("1000000000000000000"),
732            vec![Token::Int(1000000000000000000)]
733        );
734    }
735}