luna_core/frontend/token.rs
1//! Lexical tokens produced by the lexer.
2
3use crate::frontend::span::Span;
4use crate::version::LuaVersion;
5
6/// One lexical token produced by the lexer.
7#[derive(Clone, PartialEq, Debug)]
8pub enum Token {
9 // keywords
10 /// `and` keyword.
11 And,
12 /// `break` keyword.
13 Break,
14 /// `do` keyword.
15 Do,
16 /// `else` keyword.
17 Else,
18 /// `elseif` keyword.
19 Elseif,
20 /// `end` keyword.
21 End,
22 /// `false` keyword.
23 False,
24 /// `for` keyword.
25 For,
26 /// `function` keyword.
27 Function,
28 /// 5.5 `global` keyword.
29 Global,
30 /// `goto` keyword.
31 Goto,
32 /// `if` keyword.
33 If,
34 /// `in` keyword.
35 In,
36 /// `local` keyword.
37 Local,
38 /// `nil` keyword.
39 Nil,
40 /// `not` keyword.
41 Not,
42 /// `or` keyword.
43 Or,
44 /// `repeat` keyword.
45 Repeat,
46 /// `return` keyword.
47 Return,
48 /// `then` keyword.
49 Then,
50 /// `true` keyword.
51 True,
52 /// `until` keyword.
53 Until,
54 /// `while` keyword.
55 While,
56 // symbols
57 /// `+` symbol.
58 Plus,
59 /// `-` symbol.
60 Minus,
61 /// `*` symbol.
62 Star,
63 /// `/` symbol.
64 Slash,
65 /// `//` symbol (floor division).
66 DSlash,
67 /// `%` symbol.
68 Percent,
69 /// `^` symbol.
70 Caret,
71 /// `#` symbol.
72 Hash,
73 /// `&` symbol.
74 Amp,
75 /// `~` symbol (bitwise xor / unary bnot).
76 Tilde,
77 /// `|` symbol.
78 Pipe,
79 /// `<<` symbol.
80 Shl,
81 /// `>>` symbol.
82 Shr,
83 /// `==` symbol.
84 Eq,
85 /// `~=` symbol.
86 Ne,
87 /// `<=` symbol.
88 Le,
89 /// `>=` symbol.
90 Ge,
91 /// `<` symbol.
92 Lt,
93 /// `>` symbol.
94 Gt,
95 /// `=` symbol (assignment).
96 Assign,
97 /// `(` symbol.
98 LParen,
99 /// `)` symbol.
100 RParen,
101 /// `{` symbol.
102 LBrace,
103 /// `}` symbol.
104 RBrace,
105 /// `[` symbol.
106 LBracket,
107 /// `]` symbol.
108 RBracket,
109 /// `::` symbol (label delimiter).
110 DColon,
111 /// `;` symbol.
112 Semi,
113 /// `:` symbol.
114 Colon,
115 /// `,` symbol.
116 Comma,
117 /// `.` symbol.
118 Dot,
119 /// `..` symbol (concatenation).
120 Concat,
121 /// `...` symbol (vararg).
122 Ellipsis,
123 // literals
124 /// Integer literal.
125 Int(
126 /// Decoded 64-bit signed value.
127 i64,
128 ),
129 /// Floating-point literal.
130 Float(
131 /// Decoded IEEE-754 double value.
132 f64,
133 ),
134 /// String literal (raw bytes; Lua strings are 8-bit clean).
135 Str(
136 /// Decoded byte contents.
137 Vec<u8>,
138 ),
139 /// Identifier.
140 Name(
141 /// Source text of the identifier.
142 Box<str>,
143 ),
144 /// MacroLua `@` sigil (v1.3 Phase ML). Lexed only when
145 /// `version.is_macro_lua()`; PUC 5.1-5.5 sources continue to
146 /// error `unexpected symbol near '@'`.
147 At,
148 /// MacroLua explicit quote-block opener `@{`. Lexed only when
149 /// `version.is_macro_lua()`; pairs with [`Token::MacroBraceClose`].
150 MacroBraceOpen,
151 /// MacroLua explicit quote-block closer `}@`. Lexed only when
152 /// `version.is_macro_lua()`; pairs with [`Token::MacroBraceOpen`].
153 MacroBraceClose,
154 /// Synthetic token produced by the macro expander pre-pass: a
155 /// captured token run (the body of a `@quote{...}` or `@{...}@`
156 /// block). The lexer never emits this. After the expander runs
157 /// it splices these back into the stream as raw token sequences
158 /// before the parser proper sees them.
159 MacroQuote(
160 /// Captured token run.
161 Box<[TokenInfo]>,
162 ),
163 /// End-of-file marker.
164 Eof,
165}
166
167impl Token {
168 /// The near-token shown in `... near <tok>` error messages. Every
169 /// concrete token (names, literals, symbols, keywords) is wrapped in
170 /// single quotes per PUC's `txtToken`/`luaX_token2str`. The
171 /// pseudo-token `<eof>` is unquoted under 5.2+ — those suites'
172 /// `checksyntax` has a `^<%a` guard that *adds* quotes only when the
173 /// expected token doesn't already start with `<`. 5.1's `checksyntax`
174 /// has no such guard and unconditionally wraps the expected token, so
175 /// `<eof>` must come through quoted there to match the
176 /// `... near '<eof>'` shape (5.1 errors.lua :20-:21 pin this; PUC's
177 /// 5.1 luaX_lexerror output is the same).
178 pub fn describe(&self, src: &[u8], span: Span, version: LuaVersion) -> String {
179 match self {
180 Token::Eof if version <= LuaVersion::Lua51 => "'<eof>'".to_string(),
181 Token::Eof => "<eof>".to_string(),
182 _ => format!("'{}'", String::from_utf8_lossy(span.slice(src))),
183 }
184 }
185}
186
187/// A token plus where it came from.
188#[derive(Clone, Debug, PartialEq)]
189pub struct TokenInfo {
190 /// The lexical token.
191 pub tok: Token,
192 /// Byte range in source.
193 pub span: Span,
194 /// 1-based source line where the token starts.
195 pub line: u32,
196}