1#[derive(Debug, Clone, PartialEq)]
4pub enum Token {
5 Nil,
7 True,
8 False,
9 Int(i64),
10 Float(f64),
11 Str(String),
12 Ident(String),
13
14 And, Or, Not,
16 If, Then, Else, ElseIf, End,
17 While, Do, For, In, Repeat, Until,
18 Function, Return, Local, Break, Continue,
19 Class, Self_, New, Import, Export,
20 Match, Case, Default,
21
22 Plus, Minus, Star, Slash, Percent, Caret, Hash,
24 Amp, Pipe, Tilde, ShiftLeft, ShiftRight, SlashSlash,
25 EqEq, NotEq, Lt, LtEq, Gt, GtEq,
26 Eq, PlusEq, MinusEq, StarEq, SlashEq,
27 DotDot, DotDotDot, Arrow,
28 Bang,
29
30 LParen, RParen, LBrace, RBrace, LBracket, RBracket,
32 Comma, Semicolon, Colon, ColonColon, Dot,
33
34 Eof,
36}
37
38#[derive(Debug, Clone)]
39pub struct Span {
40 pub line: u32,
41 pub column: u32,
42}
43
44#[derive(Debug, Clone)]
45pub struct TokenWithSpan {
46 pub token: Token,
47 pub span: Span,
48}
49
50pub struct Lexer {
52 source: Vec<char>,
53 pos: usize,
54 line: u32,
55 column: u32,
56}
57
58impl Lexer {
59 pub fn new(source: &str) -> Self {
60 Self { source: source.chars().collect(), pos: 0, line: 1, column: 1 }
61 }
62
63 fn peek(&self) -> Option<char> { self.source.get(self.pos).copied() }
64 fn peek2(&self) -> Option<char> { self.source.get(self.pos + 1).copied() }
65
66 fn advance(&mut self) -> Option<char> {
67 let c = self.source.get(self.pos).copied();
68 if let Some(ch) = c {
69 self.pos += 1;
70 if ch == '\n' { self.line += 1; self.column = 1; }
71 else { self.column += 1; }
72 }
73 c
74 }
75
76 fn span(&self) -> Span { Span { line: self.line, column: self.column } }
77
78 fn skip_whitespace_and_comments(&mut self) {
79 loop {
80 while self.peek().map(|c| c.is_whitespace()).unwrap_or(false) {
82 self.advance();
83 }
84 if self.peek() == Some('-') && self.peek2() == Some('-') {
86 self.advance(); self.advance();
87 while self.peek().map(|c| c != '\n').unwrap_or(false) { self.advance(); }
88 continue;
89 }
90 if self.peek() == Some('/') && self.peek2() == Some('/') {
91 self.advance(); self.advance();
92 while self.peek().map(|c| c != '\n').unwrap_or(false) { self.advance(); }
93 continue;
94 }
95 if self.peek() == Some('/') && self.peek2() == Some('*') {
97 self.advance(); self.advance();
98 while self.pos + 1 < self.source.len() {
99 if self.peek() == Some('*') && self.peek2() == Some('/') {
100 self.advance(); self.advance(); break;
101 }
102 self.advance();
103 }
104 continue;
105 }
106 break;
107 }
108 }
109
110 fn read_string(&mut self, delim: char) -> String {
111 let mut s = String::new();
112 while let Some(c) = self.peek() {
113 if c == delim { self.advance(); break; }
114 if c == '\\' {
115 self.advance();
116 match self.advance() {
117 Some('n') => s.push('\n'),
118 Some('t') => s.push('\t'),
119 Some('r') => s.push('\r'),
120 Some('\\') => s.push('\\'),
121 Some('\'') => s.push('\''),
122 Some('"') => s.push('"'),
123 Some('0') => s.push('\0'),
124 Some(x) => { s.push('\\'); s.push(x); }
125 None => break,
126 }
127 } else {
128 s.push(c);
129 self.advance();
130 }
131 }
132 s
133 }
134
135 fn read_number(&mut self, first: char) -> Token {
136 let mut num = first.to_string();
137 let mut is_float = false;
138 while let Some(c) = self.peek() {
139 if c.is_ascii_digit() { num.push(c); self.advance(); }
140 else if c == '.' && !is_float && self.peek2().map(|n| n.is_ascii_digit()).unwrap_or(false) {
141 is_float = true; num.push(c); self.advance();
142 }
143 else if (c == 'e' || c == 'E') && !num.contains('e') && !num.contains('E') {
144 is_float = true; num.push(c); self.advance();
145 if self.peek() == Some('+') || self.peek() == Some('-') {
146 if let Some(sign) = self.advance() { num.push(sign); }
147 }
148 }
149 else { break; }
150 }
151 if is_float {
152 Token::Float(num.parse().unwrap_or(0.0))
153 } else {
154 Token::Int(num.parse().unwrap_or(0))
155 }
156 }
157
158 fn read_ident(&mut self, first: char) -> Token {
159 let mut ident = first.to_string();
160 while let Some(c) = self.peek() {
161 if c.is_alphanumeric() || c == '_' { ident.push(c); self.advance(); }
162 else { break; }
163 }
164 match ident.as_str() {
165 "nil" => Token::Nil,
166 "true" => Token::True,
167 "false" => Token::False,
168 "and" => Token::And,
169 "or" => Token::Or,
170 "not" => Token::Not,
171 "if" => Token::If,
172 "then" => Token::Then,
173 "else" => Token::Else,
174 "elseif" => Token::ElseIf,
175 "end" => Token::End,
176 "while" => Token::While,
177 "do" => Token::Do,
178 "for" => Token::For,
179 "in" => Token::In,
180 "repeat" => Token::Repeat,
181 "until" => Token::Until,
182 "function" => Token::Function,
183 "return" => Token::Return,
184 "local" => Token::Local,
185 "break" => Token::Break,
186 "continue" => Token::Continue,
187 "class" => Token::Class,
188 "self" => Token::Self_,
189 "new" => Token::New,
190 "import" => Token::Import,
191 "export" => Token::Export,
192 "match" => Token::Match,
193 "case" => Token::Case,
194 "default" => Token::Default,
195 _ => Token::Ident(ident),
196 }
197 }
198
199 pub fn tokenize(&mut self) -> Vec<TokenWithSpan> {
200 let mut tokens = Vec::new();
201 loop {
202 self.skip_whitespace_and_comments();
203 let span = self.span();
204 let ch = match self.advance() {
205 Some(c) => c,
206 None => { tokens.push(TokenWithSpan { token: Token::Eof, span }); break; }
207 };
208
209 let token = match ch {
210 '+' => { if self.peek() == Some('=') { self.advance(); Token::PlusEq } else { Token::Plus } }
211 '-' => { if self.peek() == Some('=') { self.advance(); Token::MinusEq } else if self.peek() == Some('>') { self.advance(); Token::Arrow } else { Token::Minus } }
212 '*' => { if self.peek() == Some('=') { self.advance(); Token::StarEq } else { Token::Star } }
213 '/' => { if self.peek() == Some('=') { self.advance(); Token::SlashEq } else if self.peek() == Some('/') { self.advance(); Token::SlashSlash } else { Token::Slash } }
214 '%' => Token::Percent,
215 '^' => Token::Caret,
216 '#' => Token::Hash,
217 '&' => Token::Amp,
218 '|' => Token::Pipe,
219 '~' => { if self.peek() == Some('=') { self.advance(); Token::NotEq } else { Token::Tilde } }
220 '<' => { if self.peek() == Some('=') { self.advance(); Token::LtEq } else if self.peek() == Some('<') { self.advance(); Token::ShiftLeft } else { Token::Lt } }
221 '>' => { if self.peek() == Some('=') { self.advance(); Token::GtEq } else if self.peek() == Some('>') { self.advance(); Token::ShiftRight } else { Token::Gt } }
222 '=' => { if self.peek() == Some('=') { self.advance(); Token::EqEq } else { Token::Eq } }
223 '!' => { if self.peek() == Some('=') { self.advance(); Token::NotEq } else { Token::Bang } }
224 '.' => {
225 if self.peek() == Some('.') {
226 self.advance();
227 if self.peek() == Some('.') { self.advance(); Token::DotDotDot }
228 else { Token::DotDot }
229 } else { Token::Dot }
230 }
231 ':' => { if self.peek() == Some(':') { self.advance(); Token::ColonColon } else { Token::Colon } }
232 '(' => Token::LParen,
233 ')' => Token::RParen,
234 '{' => Token::LBrace,
235 '}' => Token::RBrace,
236 '[' => Token::LBracket,
237 ']' => Token::RBracket,
238 ',' => Token::Comma,
239 ';' => Token::Semicolon,
240 '\'' | '"' => Token::Str(self.read_string(ch)),
241 '`' => Token::Str(self.read_string('`')),
242 c if c.is_ascii_digit() => self.read_number(c),
243 c if c.is_alphabetic() || c == '_' => self.read_ident(c),
244 _ => continue,
245 };
246 tokens.push(TokenWithSpan { token, span });
247 }
248 tokens
249 }
250}
251
252#[cfg(test)]
253mod tests {
254 use super::*;
255
256 fn lex(src: &str) -> Vec<Token> {
257 let mut l = Lexer::new(src);
258 l.tokenize().into_iter().map(|t| t.token).collect()
259 }
260
261 #[test]
262 fn test_lex_simple_assign() {
263 let toks = lex("local x = 42");
264 assert!(toks.contains(&Token::Local));
265 assert!(toks.contains(&Token::Ident("x".to_string())));
266 assert!(toks.contains(&Token::Eq));
267 assert!(toks.contains(&Token::Int(42)));
268 }
269
270 #[test]
271 fn test_lex_string() {
272 let toks = lex(r#"local s = "hello world""#);
273 assert!(toks.contains(&Token::Str("hello world".to_string())));
274 }
275
276 #[test]
277 fn test_lex_float() {
278 let toks = lex("3.14");
279 assert!(toks.iter().any(|t| matches!(t, Token::Float(v) if (*v - 3.14).abs() < 1e-6)));
280 }
281
282 #[test]
283 fn test_lex_operators() {
284 let toks = lex("a == b ~= c <= d >= e");
285 assert!(toks.contains(&Token::EqEq));
286 assert!(toks.contains(&Token::NotEq));
287 assert!(toks.contains(&Token::LtEq));
288 assert!(toks.contains(&Token::GtEq));
289 }
290
291 #[test]
292 fn test_lex_keywords() {
293 let toks = lex("if x then return end");
294 assert!(toks.contains(&Token::If));
295 assert!(toks.contains(&Token::Then));
296 assert!(toks.contains(&Token::Return));
297 assert!(toks.contains(&Token::End));
298 }
299
300 #[test]
301 fn test_lex_comment_skip() {
302 let toks = lex("local x = 1 -- this is a comment\nlocal y = 2");
303 assert!(!toks.iter().any(|t| matches!(t, Token::Ident(s) if s == "this")));
304 }
305}