1use crate::{kind::LuaSyntaxKind, language::LuaLanguage};
6use oak_core::{IncrementalCache, Lexer, LexerState, OakError, lexer::LexOutput, source::Source};
7
8type State<S> = LexerState<S, LuaLanguage>;
9
10#[derive(Clone)]
12pub struct LuaLexer<'config> {
13 config: &'config LuaLanguage,
14}
15
16impl<'config> LuaLexer<'config> {
17 pub fn new(config: &'config LuaLanguage) -> Self {
19 Self { config }
20 }
21
22 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
24 let start_pos = state.get_position();
25
26 while let Some(ch) = state.peek() {
27 if ch == ' ' || ch == '\t' {
28 state.advance(ch.len_utf8());
29 }
30 else {
31 break;
32 }
33 }
34
35 if state.get_position() > start_pos {
36 state.add_token(LuaSyntaxKind::Whitespace, start_pos, state.get_position());
37 true
38 }
39 else {
40 false
41 }
42 }
43
44 fn lex_newline<S: Source>(&self, state: &mut State<S>) -> bool {
46 let start_pos = state.get_position();
47
48 if let Some('\n') = state.peek() {
49 state.advance(1);
50 state.add_token(LuaSyntaxKind::Whitespace, start_pos, state.get_position());
51 true
52 }
53 else if let Some('\r') = state.peek() {
54 state.advance(1);
55 if let Some('\n') = state.peek() {
56 state.advance(1);
57 }
58 state.add_token(LuaSyntaxKind::Whitespace, start_pos, state.get_position());
59 true
60 }
61 else {
62 false
63 }
64 }
65
66 fn lex_comment<S: Source>(&self, state: &mut State<S>) -> bool {
68 let start_pos = state.get_position();
69
70 if let Some('-') = state.current() {
71 if let Some('-') = state.peek() {
72 state.advance(1); state.advance(1); if let Some('[') = state.current() {
77 if let Some('[') = state.peek() {
78 state.advance(1); state.advance(1); while let Some(ch) = state.current() {
83 if ch == ']' {
84 if let Some(']') = state.peek() {
85 state.advance(1); state.advance(1); break;
88 }
89 }
90 state.advance(ch.len_utf8());
91 }
92 }
93 else {
94 while let Some(ch) = state.current() {
96 if ch == '\n' || ch == '\r' {
97 break;
98 }
99 state.advance(ch.len_utf8());
100 }
101 }
102 }
103 else {
104 while let Some(ch) = state.current() {
106 if ch == '\n' || ch == '\r' {
107 break;
108 }
109 state.advance(ch.len_utf8());
110 }
111 }
112
113 state.add_token(LuaSyntaxKind::Comment, start_pos, state.get_position());
114 true
115 }
116 else {
117 false
118 }
119 }
120 else {
121 false
122 }
123 }
124
125 fn lex_string<S: Source>(&self, state: &mut State<S>) -> bool {
127 let start_pos = state.get_position();
128
129 if let Some(quote_char) = state.current() {
130 if quote_char == '"' || quote_char == '\'' {
131 state.advance(1); let mut escaped = false;
134 while let Some(ch) = state.current() {
135 if escaped {
136 escaped = false;
137 state.advance(ch.len_utf8());
138 }
139 else if ch == '\\' {
140 escaped = true;
141 state.advance(1);
142 }
143 else if ch == quote_char {
144 state.advance(1); break;
146 }
147 else if ch == '\n' || ch == '\r' {
148 break;
150 }
151 else {
152 state.advance(ch.len_utf8());
153 }
154 }
155
156 state.add_token(LuaSyntaxKind::String, start_pos, state.get_position());
157 true
158 }
159 else if quote_char == '[' {
160 if let Some('[') = state.peek() {
162 state.advance(1); state.advance(1); while let Some(ch) = state.current() {
167 if ch == ']' {
168 if let Some(']') = state.peek() {
169 state.advance(1); state.advance(1); break;
172 }
173 }
174 state.advance(ch.len_utf8());
175 }
176
177 state.add_token(LuaSyntaxKind::String, start_pos, state.get_position());
178 true
179 }
180 else {
181 false
182 }
183 }
184 else {
185 false
186 }
187 }
188 else {
189 false
190 }
191 }
192
193 fn lex_number<S: Source>(&self, state: &mut State<S>) -> bool {
195 let start_pos = state.get_position();
196
197 if let Some(ch) = state.current() {
198 if ch.is_ascii_digit() {
199 if ch == '0' {
201 if let Some(next_ch) = state.peek() {
202 if next_ch == 'x' || next_ch == 'X' {
203 state.advance(1); state.advance(1); while let Some(hex_ch) = state.current() {
208 if hex_ch.is_ascii_hexdigit() {
209 state.advance(1);
210 }
211 else {
212 break;
213 }
214 }
215
216 state.add_token(LuaSyntaxKind::Number, start_pos, state.get_position());
217 return true;
218 }
219 }
220 }
221
222 let mut has_dot = false;
224 let mut has_exp = false;
225
226 while let Some(num_ch) = state.current() {
227 if num_ch.is_ascii_digit() {
228 state.advance(1);
229 }
230 else if num_ch == '.' && !has_dot && !has_exp {
231 has_dot = true;
232 state.advance(1);
233 }
234 else if (num_ch == 'e' || num_ch == 'E') && !has_exp {
235 has_exp = true;
236 state.advance(1);
237
238 if let Some(sign_ch) = state.current() {
240 if sign_ch == '+' || sign_ch == '-' {
241 state.advance(1);
242 }
243 }
244 }
245 else {
246 break;
247 }
248 }
249
250 state.add_token(LuaSyntaxKind::Number, start_pos, state.get_position());
251 true
252 }
253 else {
254 false
255 }
256 }
257 else {
258 false
259 }
260 }
261
262 fn lex_identifier_or_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
264 if let Some(ch) = state.current() {
265 if ch.is_ascii_alphabetic() || ch == '_' {
266 let range = state.take_while(|c| c.is_ascii_alphanumeric() || c == '_');
267 let text = state.get_text_in(range);
269 let token_kind = self.keyword_or_identifier(text);
270 state.add_token(token_kind, range.start, range.end);
271 true
272 }
273 else {
274 false
275 }
276 }
277 else {
278 false
279 }
280 }
281
282 fn keyword_or_identifier(&self, text: &str) -> LuaSyntaxKind {
284 match text {
285 "and" => LuaSyntaxKind::And,
286 "break" => LuaSyntaxKind::Break,
287 "do" => LuaSyntaxKind::Do,
288 "else" => LuaSyntaxKind::Else,
289 "elseif" => LuaSyntaxKind::Elseif,
290 "end" => LuaSyntaxKind::End,
291 "false" => LuaSyntaxKind::False,
292 "for" => LuaSyntaxKind::For,
293 "function" => LuaSyntaxKind::Function,
294 "goto" => LuaSyntaxKind::Goto,
295 "if" => LuaSyntaxKind::If,
296 "in" => LuaSyntaxKind::In,
297 "local" => LuaSyntaxKind::Local,
298 "nil" => LuaSyntaxKind::Nil,
299 "not" => LuaSyntaxKind::Not,
300 "or" => LuaSyntaxKind::Or,
301 "repeat" => LuaSyntaxKind::Repeat,
302 "return" => LuaSyntaxKind::Return,
303 "then" => LuaSyntaxKind::Then,
304 "true" => LuaSyntaxKind::True,
305 "until" => LuaSyntaxKind::Until,
306 "while" => LuaSyntaxKind::While,
307 _ => LuaSyntaxKind::Identifier,
308 }
309 }
310
311 fn lex_operator_or_delimiter<S: Source>(&self, state: &mut State<S>) -> bool {
313 let start_pos = state.get_position();
314
315 if let Some(ch) = state.peek() {
316 let token_kind = match ch {
317 '=' => {
318 state.advance(1);
319 if let Some('=') = state.peek() {
320 state.advance(1);
321 LuaSyntaxKind::EqEq
322 }
323 else {
324 LuaSyntaxKind::Eq
325 }
326 }
327 '~' => {
328 state.advance(1);
329 if let Some('=') = state.peek() {
330 state.advance(1);
331 LuaSyntaxKind::TildeEq
332 }
333 else {
334 LuaSyntaxKind::Tilde
335 }
336 }
337 '<' => {
338 state.advance(1);
339 if let Some('=') = state.peek() {
340 state.advance(1);
341 LuaSyntaxKind::LtEq
342 }
343 else if let Some('<') = state.peek() {
344 state.advance(1);
345 LuaSyntaxKind::LtLt
346 }
347 else {
348 LuaSyntaxKind::Lt
349 }
350 }
351 '>' => {
352 state.advance(1);
353 if let Some('=') = state.peek() {
354 state.advance(1);
355 LuaSyntaxKind::GtEq
356 }
357 else if let Some('>') = state.peek() {
358 state.advance(1);
359 LuaSyntaxKind::GtGt
360 }
361 else {
362 LuaSyntaxKind::Gt
363 }
364 }
365 '.' => {
366 state.advance(1);
367 if let Some('.') = state.peek() {
368 state.advance(1);
369 if let Some('.') = state.peek() {
370 state.advance(1);
371 LuaSyntaxKind::DotDotDot
372 }
373 else {
374 LuaSyntaxKind::DotDot
375 }
376 }
377 else {
378 LuaSyntaxKind::Dot
379 }
380 }
381 ':' => {
382 state.advance(1);
383 if let Some(':') = state.peek() {
384 state.advance(1);
385 LuaSyntaxKind::ColonColon
386 }
387 else {
388 LuaSyntaxKind::Colon
389 }
390 }
391 '/' => {
392 state.advance(1);
393 if let Some('/') = state.peek() {
394 state.advance(1);
395 LuaSyntaxKind::SlashSlash
396 }
397 else {
398 LuaSyntaxKind::Slash
399 }
400 }
401 '+' => {
402 state.advance(1);
403 LuaSyntaxKind::Plus
404 }
405 '-' => {
406 state.advance(1);
407 LuaSyntaxKind::Minus
408 }
409 '*' => {
410 state.advance(1);
411 LuaSyntaxKind::Star
412 }
413 '%' => {
414 state.advance(1);
415 LuaSyntaxKind::Percent
416 }
417 '^' => {
418 state.advance(1);
419 LuaSyntaxKind::Caret
420 }
421 '#' => {
422 state.advance(1);
423 LuaSyntaxKind::Hash
424 }
425 '&' => {
426 state.advance(1);
427 LuaSyntaxKind::Ampersand
428 }
429 '|' => {
430 state.advance(1);
431 LuaSyntaxKind::Pipe
432 }
433 '(' => {
434 state.advance(1);
435 LuaSyntaxKind::LeftParen
436 }
437 ')' => {
438 state.advance(1);
439 LuaSyntaxKind::RightParen
440 }
441 '{' => {
442 state.advance(1);
443 LuaSyntaxKind::LeftBrace
444 }
445 '}' => {
446 state.advance(1);
447 LuaSyntaxKind::RightBrace
448 }
449 '[' => {
450 state.advance(1);
451 LuaSyntaxKind::LeftBracket
452 }
453 ']' => {
454 state.advance(1);
455 LuaSyntaxKind::RightBracket
456 }
457 ';' => {
458 state.advance(1);
459 LuaSyntaxKind::Semicolon
460 }
461 ',' => {
462 state.advance(1);
463 LuaSyntaxKind::Comma
464 }
465 _ => return false,
466 };
467
468 state.add_token(token_kind, start_pos, state.get_position());
469 true
470 }
471 else {
472 false
473 }
474 }
475}
476
477impl<'config> Lexer<LuaLanguage> for LuaLexer<'config> {
478 fn lex_incremental(
479 &self,
480 source: impl Source,
481 _offset: usize,
482 _cache: IncrementalCache<LuaLanguage>,
483 ) -> LexOutput<LuaLanguage> {
484 let mut state = LexerState::new_with_cache(source, _offset, _cache);
485 let result = self.run(&mut state);
486 state.finish(result)
487 }
488}
489
490impl<'config> LuaLexer<'config> {
491 fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
492 loop {
493 if self.skip_whitespace(state) {
495 continue;
496 }
497
498 if self.lex_newline(state) {
499 continue;
500 }
501
502 if self.lex_comment(state) {
503 continue;
504 }
505
506 if self.lex_string(state) {
507 continue;
508 }
509
510 if self.lex_number(state) {
511 continue;
512 }
513
514 if self.lex_identifier_or_keyword(state) {
515 continue;
516 }
517
518 if self.lex_operator_or_delimiter(state) {
519 continue;
520 }
521
522 if let Some(ch) = state.current() {
524 let start_pos = state.get_position();
526 state.advance(ch.len_utf8());
527 state.add_token(LuaSyntaxKind::Error, start_pos, state.get_position());
528 }
529 else {
530 break;
532 }
533 }
534
535 let eof_pos = state.get_position();
537 state.add_token(LuaSyntaxKind::Eof, eof_pos, eof_pos);
538
539 Ok(())
540 }
541}