1#[derive(Debug, Clone, PartialEq)]
4pub enum Token {
5 Nil,
7 True,
8 False,
9 Int(i64),
10 Float(f64),
11 Str(String),
12 Ident(String),
13
14 And, Or, Not,
16 If, Then, Else, ElseIf, End,
17 While, Do, For, In, Repeat, Until,
18 Function, Return, Local, Break, Continue,
19 Class, Self_, New, Import, Export,
20 Match, Case, Default,
21
22 Plus, Minus, Star, Slash, Percent, Caret, Hash,
24 Amp, Pipe, Tilde, ShiftLeft, ShiftRight, SlashSlash,
25 EqEq, NotEq, Lt, LtEq, Gt, GtEq,
26 Eq, PlusEq, MinusEq, StarEq, SlashEq,
27 DotDot, DotDotDot, Arrow, FatArrow,
28 Bang, Question,
29
30 LParen, RParen, LBrace, RBrace, LBracket, RBracket,
32 Comma, Semicolon, Colon, ColonColon, Dot,
33
34 Eof,
36}
37
38#[derive(Debug, Clone)]
39pub struct Span {
40 pub line: u32,
41 pub column: u32,
42}
43
44#[derive(Debug, Clone)]
45pub struct TokenWithSpan {
46 pub token: Token,
47 pub span: Span,
48}
49
50pub struct Lexer {
52 source: Vec<char>,
53 pos: usize,
54 line: u32,
55 column: u32,
56}
57
58impl Lexer {
59 pub fn new(source: &str) -> Self {
60 Self { source: source.chars().collect(), pos: 0, line: 1, column: 1 }
61 }
62
63 fn peek(&self) -> Option<char> { self.source.get(self.pos).copied() }
64 fn peek2(&self) -> Option<char> { self.source.get(self.pos + 1).copied() }
65
66 fn advance(&mut self) -> Option<char> {
67 let c = self.source.get(self.pos).copied();
68 if let Some(ch) = c {
69 self.pos += 1;
70 if ch == '\n' { self.line += 1; self.column = 1; }
71 else { self.column += 1; }
72 }
73 c
74 }
75
76 fn span(&self) -> Span { Span { line: self.line, column: self.column } }
77
78 fn skip_whitespace_and_comments(&mut self) {
79 loop {
80 while self.peek().map(|c| c.is_whitespace()).unwrap_or(false) {
82 self.advance();
83 }
84 if self.peek() == Some('-') && self.peek2() == Some('-') {
86 self.advance(); self.advance();
87 while self.peek().map(|c| c != '\n').unwrap_or(false) { self.advance(); }
88 continue;
89 }
90 if self.peek() == Some('/') && self.peek2() == Some('/') {
91 self.advance(); self.advance();
92 while self.peek().map(|c| c != '\n').unwrap_or(false) { self.advance(); }
93 continue;
94 }
95 if self.peek() == Some('/') && self.peek2() == Some('*') {
97 self.advance(); self.advance();
98 while self.pos + 1 < self.source.len() {
99 if self.peek() == Some('*') && self.peek2() == Some('/') {
100 self.advance(); self.advance(); break;
101 }
102 self.advance();
103 }
104 continue;
105 }
106 break;
107 }
108 }
109
110 fn read_string(&mut self, delim: char) -> String {
111 let mut s = String::new();
112 while let Some(c) = self.peek() {
113 if c == delim { self.advance(); break; }
114 if c == '\\' {
115 self.advance();
116 match self.advance() {
117 Some('n') => s.push('\n'),
118 Some('t') => s.push('\t'),
119 Some('r') => s.push('\r'),
120 Some('\\') => s.push('\\'),
121 Some('\'') => s.push('\''),
122 Some('"') => s.push('"'),
123 Some('0') => s.push('\0'),
124 Some(x) => { s.push('\\'); s.push(x); }
125 None => break,
126 }
127 } else {
128 s.push(c);
129 self.advance();
130 }
131 }
132 s
133 }
134
135 fn read_number(&mut self, first: char) -> Token {
136 let mut num = first.to_string();
137 let mut is_float = false;
138 if first == '0' && (self.peek() == Some('x') || self.peek() == Some('X')) {
140 self.advance(); let mut hex = String::new();
142 while let Some(c) = self.peek() {
143 if c.is_ascii_hexdigit() { hex.push(c); self.advance(); }
144 else { break; }
145 }
146 return Token::Int(i64::from_str_radix(&hex, 16).unwrap_or(0));
147 }
148 while let Some(c) = self.peek() {
149 if c.is_ascii_digit() { num.push(c); self.advance(); }
150 else if c == '.' && !is_float && self.peek2().map(|n| n.is_ascii_digit()).unwrap_or(false) {
151 is_float = true; num.push(c); self.advance();
152 }
153 else if (c == 'e' || c == 'E') && !num.contains('e') && !num.contains('E') {
154 is_float = true; num.push(c); self.advance();
155 if self.peek() == Some('+') || self.peek() == Some('-') {
156 if let Some(sign) = self.advance() { num.push(sign); }
157 }
158 }
159 else { break; }
160 }
161 if is_float {
162 Token::Float(num.parse().unwrap_or(0.0))
163 } else {
164 Token::Int(num.parse().unwrap_or(0))
165 }
166 }
167
168 fn read_ident(&mut self, first: char) -> Token {
169 let mut ident = first.to_string();
170 while let Some(c) = self.peek() {
171 if c.is_alphanumeric() || c == '_' { ident.push(c); self.advance(); }
172 else { break; }
173 }
174 match ident.as_str() {
175 "nil" => Token::Nil,
176 "true" => Token::True,
177 "false" => Token::False,
178 "and" => Token::And,
179 "or" => Token::Or,
180 "not" => Token::Not,
181 "if" => Token::If,
182 "then" => Token::Then,
183 "else" => Token::Else,
184 "elseif" => Token::ElseIf,
185 "end" => Token::End,
186 "while" => Token::While,
187 "do" => Token::Do,
188 "for" => Token::For,
189 "in" => Token::In,
190 "repeat" => Token::Repeat,
191 "until" => Token::Until,
192 "function" => Token::Function,
193 "return" => Token::Return,
194 "local" => Token::Local,
195 "break" => Token::Break,
196 "continue" => Token::Continue,
197 "class" => Token::Class,
198 "self" => Token::Self_,
199 "new" => Token::New,
200 "import" => Token::Import,
201 "export" => Token::Export,
202 "match" => Token::Match,
203 "case" => Token::Case,
204 "default" => Token::Default,
205 _ => Token::Ident(ident),
206 }
207 }
208
209 pub fn tokenize(&mut self) -> Vec<TokenWithSpan> {
210 let mut tokens = Vec::new();
211 loop {
212 self.skip_whitespace_and_comments();
213 let span = self.span();
214 let ch = match self.advance() {
215 Some(c) => c,
216 None => { tokens.push(TokenWithSpan { token: Token::Eof, span }); break; }
217 };
218
219 let token = match ch {
220 '+' => { if self.peek() == Some('=') { self.advance(); Token::PlusEq } else { Token::Plus } }
221 '-' => { if self.peek() == Some('=') { self.advance(); Token::MinusEq } else if self.peek() == Some('>') { self.advance(); Token::Arrow } else { Token::Minus } }
222 '*' => { if self.peek() == Some('=') { self.advance(); Token::StarEq } else { Token::Star } }
223 '/' => { if self.peek() == Some('=') { self.advance(); Token::SlashEq } else if self.peek() == Some('/') { self.advance(); Token::SlashSlash } else { Token::Slash } }
224 '%' => Token::Percent,
225 '^' => Token::Caret,
226 '#' => Token::Hash,
227 '&' => Token::Amp,
228 '|' => Token::Pipe,
229 '~' => { if self.peek() == Some('=') { self.advance(); Token::NotEq } else { Token::Tilde } }
230 '<' => { if self.peek() == Some('=') { self.advance(); Token::LtEq } else if self.peek() == Some('<') { self.advance(); Token::ShiftLeft } else { Token::Lt } }
231 '>' => { if self.peek() == Some('=') { self.advance(); Token::GtEq } else if self.peek() == Some('>') { self.advance(); Token::ShiftRight } else { Token::Gt } }
232 '=' => { if self.peek() == Some('=') { self.advance(); Token::EqEq } else if self.peek() == Some('>') { self.advance(); Token::FatArrow } else { Token::Eq } }
233 '!' => { if self.peek() == Some('=') { self.advance(); Token::NotEq } else { Token::Bang } }
234 '.' => {
235 if self.peek() == Some('.') {
236 self.advance();
237 if self.peek() == Some('.') { self.advance(); Token::DotDotDot }
238 else { Token::DotDot }
239 } else { Token::Dot }
240 }
241 ':' => { if self.peek() == Some(':') { self.advance(); Token::ColonColon } else { Token::Colon } }
242 '(' => Token::LParen,
243 ')' => Token::RParen,
244 '{' => Token::LBrace,
245 '}' => Token::RBrace,
246 '[' => Token::LBracket,
247 ']' => Token::RBracket,
248 ',' => Token::Comma,
249 ';' => Token::Semicolon,
250 '?' => Token::Question,
251 '\'' | '"' => Token::Str(self.read_string(ch)),
252 '`' => Token::Str(self.read_string('`')),
253 c if c.is_ascii_digit() => self.read_number(c),
254 c if c.is_alphabetic() || c == '_' => self.read_ident(c),
255 _ => continue,
256 };
257 tokens.push(TokenWithSpan { token, span });
258 }
259 tokens
260 }
261}
262
263#[cfg(test)]
264mod tests {
265 use super::*;
266
267 fn lex(src: &str) -> Vec<Token> {
268 let mut l = Lexer::new(src);
269 l.tokenize().into_iter().map(|t| t.token).collect()
270 }
271
272 #[test]
273 fn test_lex_simple_assign() {
274 let toks = lex("local x = 42");
275 assert!(toks.contains(&Token::Local));
276 assert!(toks.contains(&Token::Ident("x".to_string())));
277 assert!(toks.contains(&Token::Eq));
278 assert!(toks.contains(&Token::Int(42)));
279 }
280
281 #[test]
282 fn test_lex_string() {
283 let toks = lex(r#"local s = "hello world""#);
284 assert!(toks.contains(&Token::Str("hello world".to_string())));
285 }
286
287 #[test]
288 fn test_lex_float() {
289 let toks = lex("3.14");
290 assert!(toks.iter().any(|t| matches!(t, Token::Float(v) if (*v - 3.14).abs() < 1e-6)));
291 }
292
293 #[test]
294 fn test_lex_operators() {
295 let toks = lex("a == b ~= c <= d >= e");
296 assert!(toks.contains(&Token::EqEq));
297 assert!(toks.contains(&Token::NotEq));
298 assert!(toks.contains(&Token::LtEq));
299 assert!(toks.contains(&Token::GtEq));
300 }
301
302 #[test]
303 fn test_lex_keywords() {
304 let toks = lex("if x then return end");
305 assert!(toks.contains(&Token::If));
306 assert!(toks.contains(&Token::Then));
307 assert!(toks.contains(&Token::Return));
308 assert!(toks.contains(&Token::End));
309 }
310
311 #[test]
312 fn test_lex_comment_skip() {
313 let toks = lex("local x = 1 -- this is a comment\nlocal y = 2");
314 assert!(!toks.iter().any(|t| matches!(t, Token::Ident(s) if s == "this")));
315 }
316}