1use crate::{kind::LuaSyntaxKind, language::LuaLanguage};
6use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
7
8type State<'a, S> = LexerState<'a, S, LuaLanguage>;
9
10#[derive(Clone)]
12pub struct LuaLexer<'config> {
13 _config: &'config LuaLanguage,
14}
15
16impl<'config> LuaLexer<'config> {
17 pub fn new(config: &'config LuaLanguage) -> Self {
19 Self { _config: config }
20 }
21
22 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
23 while state.not_at_end() {
24 let safe_point = state.get_position();
25
26 if self.skip_whitespace(state) {
28 continue;
29 }
30
31 if self.lex_newline(state) {
32 continue;
33 }
34
35 if self.lex_comment(state) {
36 continue;
37 }
38
39 if self.lex_string(state) {
40 continue;
41 }
42
43 if self.lex_number(state) {
44 continue;
45 }
46
47 if self.lex_identifier_or_keyword(state) {
48 continue;
49 }
50
51 if self.lex_operator_or_delimiter(state) {
52 continue;
53 }
54
55 let start_pos = state.get_position();
57 if let Some(ch) = state.peek() {
58 state.advance(ch.len_utf8());
59 state.add_token(LuaSyntaxKind::Error, start_pos, state.get_position());
60 }
61
62 state.advance_if_dead_lock(safe_point);
63 }
64
65 Ok(())
66 }
67
68 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
70 let start_pos = state.get_position();
71
72 while let Some(ch) = state.peek() {
73 if ch == ' ' || ch == '\t' {
74 state.advance(ch.len_utf8());
75 }
76 else {
77 break;
78 }
79 }
80
81 if state.get_position() > start_pos {
82 state.add_token(LuaSyntaxKind::Whitespace, start_pos, state.get_position());
83 true
84 }
85 else {
86 false
87 }
88 }
89
90 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
92 let start_pos = state.get_position();
93
94 if let Some('\n') = state.peek() {
95 state.advance(1);
96 state.add_token(LuaSyntaxKind::Newline, start_pos, state.get_position());
97 true
98 }
99 else if let Some('\r') = state.peek() {
100 state.advance(1);
101 if let Some('\n') = state.peek() {
102 state.advance(1);
103 }
104 state.add_token(LuaSyntaxKind::Newline, start_pos, state.get_position());
105 true
106 }
107 else {
108 false
109 }
110 }
111
112 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
114 let start_pos = state.get_position();
115
116 if let Some('-') = state.current() {
117 if let Some('-') = state.peek() {
118 state.advance(1); state.advance(1); if let Some('[') = state.current() {
123 if let Some('[') = state.peek() {
124 state.advance(1); state.advance(1); while let Some(ch) = state.current() {
129 if ch == ']' {
130 if let Some(']') = state.peek() {
131 state.advance(1); state.advance(1); break;
134 }
135 }
136 state.advance(ch.len_utf8());
137 }
138 }
139 else {
140 while let Some(ch) = state.current() {
142 if ch == '\n' || ch == '\r' {
143 break;
144 }
145 state.advance(ch.len_utf8());
146 }
147 }
148 }
149 else {
150 while let Some(ch) = state.current() {
152 if ch == '\n' || ch == '\r' {
153 break;
154 }
155 state.advance(ch.len_utf8());
156 }
157 }
158
159 state.add_token(LuaSyntaxKind::Comment, start_pos, state.get_position());
160 true
161 }
162 else {
163 false
164 }
165 }
166 else {
167 false
168 }
169 }
170
171 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
173 let start_pos = state.get_position();
174
175 if let Some(quote_char) = state.current() {
176 if quote_char == '"' || quote_char == '\'' {
177 state.advance(1); let mut escaped = false;
180 while let Some(ch) = state.current() {
181 if escaped {
182 escaped = false;
183 state.advance(ch.len_utf8());
184 }
185 else if ch == '\\' {
186 escaped = true;
187 state.advance(1);
188 }
189 else if ch == quote_char {
190 state.advance(1); break;
192 }
193 else if ch == '\n' || ch == '\r' {
194 break;
196 }
197 else {
198 state.advance(ch.len_utf8());
199 }
200 }
201
202 state.add_token(LuaSyntaxKind::String, start_pos, state.get_position());
203 true
204 }
205 else if quote_char == '[' {
206 if let Some('[') = state.peek() {
208 state.advance(1); state.advance(1); while let Some(ch) = state.current() {
213 if ch == ']' {
214 if let Some(']') = state.peek() {
215 state.advance(1); state.advance(1); break;
218 }
219 }
220 state.advance(ch.len_utf8());
221 }
222
223 state.add_token(LuaSyntaxKind::String, start_pos, state.get_position());
224 true
225 }
226 else {
227 false
228 }
229 }
230 else {
231 false
232 }
233 }
234 else {
235 false
236 }
237 }
238
239 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
241 let start_pos = state.get_position();
242
243 if let Some(ch) = state.current() {
244 if ch.is_ascii_digit() {
245 if ch == '0' {
247 if let Some(next_ch) = state.peek() {
248 if next_ch == 'x' || next_ch == 'X' {
249 state.advance(1); state.advance(1); while let Some(hex_ch) = state.current() {
254 if hex_ch.is_ascii_hexdigit() {
255 state.advance(1);
256 }
257 else {
258 break;
259 }
260 }
261
262 state.add_token(LuaSyntaxKind::Number, start_pos, state.get_position());
263 return true;
264 }
265 }
266 }
267
268 let mut has_dot = false;
270 let mut has_exp = false;
271
272 while let Some(num_ch) = state.current() {
273 if num_ch.is_ascii_digit() {
274 state.advance(1);
275 }
276 else if num_ch == '.' && !has_dot && !has_exp {
277 has_dot = true;
278 state.advance(1);
279 }
280 else if (num_ch == 'e' || num_ch == 'E') && !has_exp {
281 has_exp = true;
282 state.advance(1);
283
284 if let Some(sign_ch) = state.current() {
286 if sign_ch == '+' || sign_ch == '-' {
287 state.advance(1);
288 }
289 }
290 }
291 else {
292 break;
293 }
294 }
295
296 state.add_token(LuaSyntaxKind::Number, start_pos, state.get_position());
297 true
298 }
299 else {
300 false
301 }
302 }
303 else {
304 false
305 }
306 }
307
308 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
310 if let Some(ch) = state.current() {
311 if ch.is_ascii_alphabetic() || ch == '_' {
312 let range = state.take_while(|c| c.is_ascii_alphanumeric() || c == '_');
313 let text = state.get_text_in(range.clone().into());
315 let token_kind = self.keyword_or_identifier(&text);
316 state.add_token(token_kind, range.start, range.end);
317 true
318 }
319 else {
320 false
321 }
322 }
323 else {
324 false
325 }
326 }
327
328 fn keyword_or_identifier(&self, text: &str) -> LuaSyntaxKind {
330 match text {
331 "and" => LuaSyntaxKind::And,
332 "break" => LuaSyntaxKind::Break,
333 "do" => LuaSyntaxKind::Do,
334 "else" => LuaSyntaxKind::Else,
335 "elseif" => LuaSyntaxKind::Elseif,
336 "end" => LuaSyntaxKind::End,
337 "false" => LuaSyntaxKind::False,
338 "for" => LuaSyntaxKind::For,
339 "function" => LuaSyntaxKind::Function,
340 "goto" => LuaSyntaxKind::Goto,
341 "if" => LuaSyntaxKind::If,
342 "in" => LuaSyntaxKind::In,
343 "local" => LuaSyntaxKind::Local,
344 "nil" => LuaSyntaxKind::Nil,
345 "not" => LuaSyntaxKind::Not,
346 "or" => LuaSyntaxKind::Or,
347 "repeat" => LuaSyntaxKind::Repeat,
348 "return" => LuaSyntaxKind::Return,
349 "then" => LuaSyntaxKind::Then,
350 "true" => LuaSyntaxKind::True,
351 "until" => LuaSyntaxKind::Until,
352 "while" => LuaSyntaxKind::While,
353 _ => LuaSyntaxKind::Identifier,
354 }
355 }
356
357 fn lex_operator_or_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
359 let start_pos = state.get_position();
360
361 if let Some(ch) = state.peek() {
362 let token_kind = match ch {
363 '=' => {
364 state.advance(1);
365 if let Some('=') = state.peek() {
366 state.advance(1);
367 LuaSyntaxKind::EqEq
368 }
369 else {
370 LuaSyntaxKind::Eq
371 }
372 }
373 '~' => {
374 state.advance(1);
375 if let Some('=') = state.peek() {
376 state.advance(1);
377 LuaSyntaxKind::TildeEq
378 }
379 else {
380 LuaSyntaxKind::Tilde
381 }
382 }
383 '<' => {
384 state.advance(1);
385 if let Some('=') = state.peek() {
386 state.advance(1);
387 LuaSyntaxKind::LtEq
388 }
389 else if let Some('<') = state.peek() {
390 state.advance(1);
391 LuaSyntaxKind::LtLt
392 }
393 else {
394 LuaSyntaxKind::Lt
395 }
396 }
397 '>' => {
398 state.advance(1);
399 if let Some('=') = state.peek() {
400 state.advance(1);
401 LuaSyntaxKind::GtEq
402 }
403 else if let Some('>') = state.peek() {
404 state.advance(1);
405 LuaSyntaxKind::GtGt
406 }
407 else {
408 LuaSyntaxKind::Gt
409 }
410 }
411 '.' => {
412 state.advance(1);
413 if let Some('.') = state.peek() {
414 state.advance(1);
415 if let Some('.') = state.peek() {
416 state.advance(1);
417 LuaSyntaxKind::DotDotDot
418 }
419 else {
420 LuaSyntaxKind::DotDot
421 }
422 }
423 else {
424 LuaSyntaxKind::Dot
425 }
426 }
427 ':' => {
428 state.advance(1);
429 if let Some(':') = state.peek() {
430 state.advance(1);
431 LuaSyntaxKind::ColonColon
432 }
433 else {
434 LuaSyntaxKind::Colon
435 }
436 }
437 '/' => {
438 state.advance(1);
439 if let Some('/') = state.peek() {
440 state.advance(1);
441 LuaSyntaxKind::SlashSlash
442 }
443 else {
444 LuaSyntaxKind::Slash
445 }
446 }
447 '+' => {
448 state.advance(1);
449 LuaSyntaxKind::Plus
450 }
451 '-' => {
452 state.advance(1);
453 LuaSyntaxKind::Minus
454 }
455 '*' => {
456 state.advance(1);
457 LuaSyntaxKind::Star
458 }
459 '%' => {
460 state.advance(1);
461 LuaSyntaxKind::Percent
462 }
463 '^' => {
464 state.advance(1);
465 LuaSyntaxKind::Caret
466 }
467 '#' => {
468 state.advance(1);
469 LuaSyntaxKind::Hash
470 }
471 '&' => {
472 state.advance(1);
473 LuaSyntaxKind::Ampersand
474 }
475 '|' => {
476 state.advance(1);
477 LuaSyntaxKind::Pipe
478 }
479 '(' => {
480 state.advance(1);
481 LuaSyntaxKind::LeftParen
482 }
483 ')' => {
484 state.advance(1);
485 LuaSyntaxKind::RightParen
486 }
487 '{' => {
488 state.advance(1);
489 LuaSyntaxKind::LeftBrace
490 }
491 '}' => {
492 state.advance(1);
493 LuaSyntaxKind::RightBrace
494 }
495 '[' => {
496 state.advance(1);
497 LuaSyntaxKind::LeftBracket
498 }
499 ']' => {
500 state.advance(1);
501 LuaSyntaxKind::RightBracket
502 }
503 ';' => {
504 state.advance(1);
505 LuaSyntaxKind::Semicolon
506 }
507 ',' => {
508 state.advance(1);
509 LuaSyntaxKind::Comma
510 }
511 _ => return false,
512 };
513
514 state.add_token(token_kind, start_pos, state.get_position());
515 true
516 }
517 else {
518 false
519 }
520 }
521}
522
523impl<'config> Lexer<LuaLanguage> for LuaLexer<'config> {
524 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<LuaLanguage>) -> LexOutput<LuaLanguage> {
525 let mut state = State::new_with_cache(source, 0, cache);
526 let result = self.run(&mut state);
527 if result.is_ok() {
528 state.add_eof();
529 }
530 state.finish_with_cache(result, cache)
531 }
532}