1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::language::LuaLanguage;
8pub use crate::lexer::token_type::LuaTokenType;
9use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
10
11type State<'a, S> = LexerState<'a, S, LuaLanguage>;
12
13#[derive(Clone)]
15pub struct LuaLexer<'config> {
16 _config: &'config LuaLanguage,
17}
18
19impl<'config> LuaLexer<'config> {
20 pub fn new(config: &'config LuaLanguage) -> Self {
22 Self { _config: config }
23 }
24
25 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
26 while state.not_at_end() {
27 let safe_point = state.get_position();
28
29 if self.skip_whitespace(state) {
31 continue;
32 }
33
34 if self.lex_newline(state) {
35 continue;
36 }
37
38 if self.lex_comment(state) {
39 continue;
40 }
41
42 if self.lex_string(state) {
43 continue;
44 }
45
46 if self.lex_number(state) {
47 continue;
48 }
49
50 if self.lex_identifier_or_keyword(state) {
51 continue;
52 }
53
54 if self.lex_operator_or_delimiter(state) {
55 continue;
56 }
57
58 let start_pos = state.get_position();
60 if let Some(ch) = state.peek() {
61 state.advance(ch.len_utf8());
62 state.add_token(LuaTokenType::Error, start_pos, state.get_position())
63 }
64
65 state.advance_if_dead_lock(safe_point)
66 }
67
68 Ok(())
69 }
70
71 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
73 let start_pos = state.get_position();
74
75 while let Some(ch) = state.peek() {
76 if ch == ' ' || ch == '\t' { state.advance(ch.len_utf8()) } else { break }
77 }
78
79 if state.get_position() > start_pos {
80 state.add_token(LuaTokenType::Whitespace, start_pos, state.get_position());
81 true
82 }
83 else {
84 false
85 }
86 }
87
88 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
90 let start_pos = state.get_position();
91
92 if let Some('\n') = state.peek() {
93 state.advance(1);
94 state.add_token(LuaTokenType::Newline, start_pos, state.get_position());
95 true
96 }
97 else if let Some('\r') = state.peek() {
98 state.advance(1);
99 if let Some('\n') = state.peek() {
100 state.advance(1)
101 }
102 state.add_token(LuaTokenType::Newline, start_pos, state.get_position());
103 true
104 }
105 else {
106 false
107 }
108 }
109
110 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
112 let start_pos = state.get_position();
113
114 if let Some('-') = state.current() {
115 if let Some('-') = state.peek() {
116 state.advance(1); state.advance(1); if let Some('[') = state.current() {
121 if let Some('[') = state.peek() {
122 state.advance(1); state.advance(1); while let Some(ch) = state.current() {
127 if ch == ']' {
128 if let Some(']') = state.peek() {
129 state.advance(1); state.advance(1); break;
132 }
133 }
134 state.advance(ch.len_utf8())
135 }
136 }
137 else {
138 while let Some(ch) = state.current() {
140 if ch == '\n' || ch == '\r' {
141 break;
142 }
143 state.advance(ch.len_utf8())
144 }
145 }
146 }
147 else {
148 while let Some(ch) = state.current() {
150 if ch == '\n' || ch == '\r' {
151 break;
152 }
153 state.advance(ch.len_utf8())
154 }
155 }
156
157 state.add_token(LuaTokenType::Comment, start_pos, state.get_position());
158 true
159 }
160 else {
161 false
162 }
163 }
164 else {
165 false
166 }
167 }
168
169 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
171 let start_pos = state.get_position();
172
173 if let Some(quote_char) = state.current() {
174 if quote_char == '"' || quote_char == '\'' {
175 state.advance(1); let mut escaped = false;
178 while let Some(ch) = state.current() {
179 if escaped {
180 escaped = false;
181 state.advance(ch.len_utf8())
182 }
183 else if ch == '\\' {
184 escaped = true;
185 state.advance(1)
186 }
187 else if ch == quote_char {
188 state.advance(1); break;
190 }
191 else if ch == '\n' || ch == '\r' {
192 break;
194 }
195 else {
196 state.advance(ch.len_utf8())
197 }
198 }
199
200 state.add_token(LuaTokenType::String, start_pos, state.get_position());
201 true
202 }
203 else if quote_char == '[' {
204 if let Some('[') = state.peek() {
206 state.advance(1); state.advance(1); while let Some(ch) = state.current() {
211 if ch == ']' {
212 if let Some(']') = state.peek() {
213 state.advance(1); state.advance(1); break;
216 }
217 }
218 state.advance(ch.len_utf8())
219 }
220
221 state.add_token(LuaTokenType::String, start_pos, state.get_position());
222 true
223 }
224 else {
225 false
226 }
227 }
228 else {
229 false
230 }
231 }
232 else {
233 false
234 }
235 }
236
237 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
239 let start_pos = state.get_position();
240
241 if let Some(ch) = state.current() {
242 if ch.is_ascii_digit() {
243 if ch == '0' {
245 if let Some(next_ch) = state.peek() {
246 if next_ch == 'x' || next_ch == 'X' {
247 state.advance(1); state.advance(1); while let Some(hex_ch) = state.current() {
252 if hex_ch.is_ascii_hexdigit() { state.advance(1) } else { break }
253 }
254
255 state.add_token(LuaTokenType::Number, start_pos, state.get_position());
256 return true;
257 }
258 }
259 }
260
261 let mut has_dot = false;
263 let mut has_exp = false;
264
265 while let Some(num_ch) = state.current() {
266 if num_ch.is_ascii_digit() {
267 state.advance(1)
268 }
269 else if num_ch == '.' && !has_dot && !has_exp {
270 has_dot = true;
271 state.advance(1)
272 }
273 else if (num_ch == 'e' || num_ch == 'E') && !has_exp {
274 has_exp = true;
275 state.advance(1);
276
277 if let Some(sign_ch) = state.current() {
279 if sign_ch == '+' || sign_ch == '-' {
280 state.advance(1)
281 }
282 }
283 }
284 else {
285 break;
286 }
287 }
288
289 state.add_token(LuaTokenType::Number, start_pos, state.get_position());
290 true
291 }
292 else {
293 false
294 }
295 }
296 else {
297 false
298 }
299 }
300
301 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
303 if let Some(ch) = state.current() {
304 if ch.is_ascii_alphabetic() || ch == '_' {
305 let range = state.take_while(|c| c.is_ascii_alphanumeric() || c == '_');
306 let text = state.get_text_in(range.clone().into());
308 let token_kind = self.keyword_or_identifier(&text);
309 state.add_token(token_kind, range.start, range.end);
310 true
311 }
312 else {
313 false
314 }
315 }
316 else {
317 false
318 }
319 }
320
321 fn keyword_or_identifier(&self, text: &str) -> LuaTokenType {
323 match text {
324 "and" => LuaTokenType::And,
325 "break" => LuaTokenType::Break,
326 "do" => LuaTokenType::Do,
327 "else" => LuaTokenType::Else,
328 "elseif" => LuaTokenType::Elseif,
329 "end" => LuaTokenType::End,
330 "false" => LuaTokenType::False,
331 "for" => LuaTokenType::For,
332 "function" => LuaTokenType::Function,
333 "goto" => LuaTokenType::Goto,
334 "if" => LuaTokenType::If,
335 "in" => LuaTokenType::In,
336 "local" => LuaTokenType::Local,
337 "nil" => LuaTokenType::Nil,
338 "not" => LuaTokenType::Not,
339 "or" => LuaTokenType::Or,
340 "repeat" => LuaTokenType::Repeat,
341 "return" => LuaTokenType::Return,
342 "then" => LuaTokenType::Then,
343 "true" => LuaTokenType::True,
344 "until" => LuaTokenType::Until,
345 "while" => LuaTokenType::While,
346 _ => LuaTokenType::Identifier,
347 }
348 }
349
350 fn lex_operator_or_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
352 let start_pos = state.get_position();
353
354 if let Some(ch) = state.peek() {
355 let token_kind = match ch {
356 '=' => {
357 state.advance(1);
358 if let Some('=') = state.peek() {
359 state.advance(1);
360 LuaTokenType::EqEq
361 }
362 else {
363 LuaTokenType::Eq
364 }
365 }
366 '~' => {
367 state.advance(1);
368 if let Some('=') = state.peek() {
369 state.advance(1);
370 LuaTokenType::TildeEq
371 }
372 else {
373 LuaTokenType::Tilde
374 }
375 }
376 '<' => {
377 state.advance(1);
378 if let Some('=') = state.peek() {
379 state.advance(1);
380 LuaTokenType::LtEq
381 }
382 else if let Some('<') = state.peek() {
383 state.advance(1);
384 LuaTokenType::LtLt
385 }
386 else {
387 LuaTokenType::Lt
388 }
389 }
390 '>' => {
391 state.advance(1);
392 if let Some('=') = state.peek() {
393 state.advance(1);
394 LuaTokenType::GtEq
395 }
396 else if let Some('>') = state.peek() {
397 state.advance(1);
398 LuaTokenType::GtGt
399 }
400 else {
401 LuaTokenType::Gt
402 }
403 }
404 '.' => {
405 state.advance(1);
406 if let Some('.') = state.peek() {
407 state.advance(1);
408 if let Some('.') = state.peek() {
409 state.advance(1);
410 LuaTokenType::DotDotDot
411 }
412 else {
413 LuaTokenType::DotDot
414 }
415 }
416 else {
417 LuaTokenType::Dot
418 }
419 }
420 ':' => {
421 state.advance(1);
422 if let Some(':') = state.peek() {
423 state.advance(1);
424 LuaTokenType::ColonColon
425 }
426 else {
427 LuaTokenType::Colon
428 }
429 }
430 '/' => {
431 state.advance(1);
432 if let Some('/') = state.peek() {
433 state.advance(1);
434 LuaTokenType::SlashSlash
435 }
436 else {
437 LuaTokenType::Slash
438 }
439 }
440 '+' => {
441 state.advance(1);
442 LuaTokenType::Plus
443 }
444 '-' => {
445 state.advance(1);
446 LuaTokenType::Minus
447 }
448 '*' => {
449 state.advance(1);
450 LuaTokenType::Star
451 }
452 '%' => {
453 state.advance(1);
454 LuaTokenType::Percent
455 }
456 '^' => {
457 state.advance(1);
458 LuaTokenType::Caret
459 }
460 '#' => {
461 state.advance(1);
462 LuaTokenType::Hash
463 }
464 '&' => {
465 state.advance(1);
466 LuaTokenType::Ampersand
467 }
468 '|' => {
469 state.advance(1);
470 LuaTokenType::Pipe
471 }
472 '(' => {
473 state.advance(1);
474 LuaTokenType::LeftParen
475 }
476 ')' => {
477 state.advance(1);
478 LuaTokenType::RightParen
479 }
480 '{' => {
481 state.advance(1);
482 LuaTokenType::LeftBrace
483 }
484 '}' => {
485 state.advance(1);
486 LuaTokenType::RightBrace
487 }
488 '[' => {
489 state.advance(1);
490 LuaTokenType::LeftBracket
491 }
492 ']' => {
493 state.advance(1);
494 LuaTokenType::RightBracket
495 }
496 ';' => {
497 state.advance(1);
498 LuaTokenType::Semicolon
499 }
500 ',' => {
501 state.advance(1);
502 LuaTokenType::Comma
503 }
504 _ => return false,
505 };
506
507 state.add_token(token_kind, start_pos, state.get_position());
508 true
509 }
510 else {
511 false
512 }
513 }
514}
515
516impl<'config> Lexer<LuaLanguage> for LuaLexer<'config> {
517 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<LuaLanguage>) -> LexOutput<LuaLanguage> {
518 let mut state = State::new_with_cache(source, 0, cache);
519 let result = self.run(&mut state);
520 if result.is_ok() {
521 state.add_eof()
522 }
523 state.finish_with_cache(result, cache)
524 }
525}