1#![doc = include_str!("readme.md")]
2pub mod token_type;
4
5use crate::language::LuaLanguage;
9pub use crate::lexer::token_type::LuaTokenType;
10use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
11
12pub(crate) type State<'a, S> = LexerState<'a, S, LuaLanguage>;
13
14#[derive(Clone)]
16pub struct LuaLexer<'config> {
17 config: &'config LuaLanguage,
18}
19
20impl<'config> LuaLexer<'config> {
21 pub fn new(config: &'config LuaLanguage) -> Self {
23 Self { config }
24 }
25
26 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
27 while state.not_at_end() {
28 let safe_point = state.get_position();
29
30 if self.skip_whitespace(state) {
32 continue;
33 }
34
35 if self.lex_newline(state) {
36 continue;
37 }
38
39 if self.lex_comment(state) {
40 continue;
41 }
42
43 if self.lex_string(state) {
44 continue;
45 }
46
47 if self.lex_number(state) {
48 continue;
49 }
50
51 if self.lex_identifier_or_keyword(state) {
52 continue;
53 }
54
55 if self.lex_operator_or_delimiter(state) {
56 continue;
57 }
58
59 let start_pos = state.get_position();
61 if let Some(ch) = state.peek() {
62 state.advance(ch.len_utf8());
63 state.add_token(LuaTokenType::Error, start_pos, state.get_position())
64 }
65
66 state.advance_if_dead_lock(safe_point)
67 }
68
69 Ok(())
70 }
71
72 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
74 let start_pos = state.get_position();
75
76 while let Some(ch) = state.peek() {
77 if ch == ' ' || ch == '\t' { state.advance(ch.len_utf8()) } else { break }
78 }
79
80 if state.get_position() > start_pos {
81 state.add_token(LuaTokenType::Whitespace, start_pos, state.get_position());
82 true
83 }
84 else {
85 false
86 }
87 }
88
89 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
91 let start_pos = state.get_position();
92
93 if let Some('\n') = state.peek() {
94 state.advance(1);
95 state.add_token(LuaTokenType::Newline, start_pos, state.get_position());
96 true
97 }
98 else if let Some('\r') = state.peek() {
99 state.advance(1);
100 if let Some('\n') = state.peek() {
101 state.advance(1)
102 }
103 state.add_token(LuaTokenType::Newline, start_pos, state.get_position());
104 true
105 }
106 else {
107 false
108 }
109 }
110
111 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
113 let start_pos = state.get_position();
114
115 if let Some('-') = state.current() {
116 if let Some('-') = state.peek() {
117 state.advance(1); state.advance(1); if let Some('[') = state.current() {
122 if let Some('[') = state.peek() {
123 state.advance(1); state.advance(1); while let Some(ch) = state.current() {
128 if ch == ']' {
129 if let Some(']') = state.peek() {
130 state.advance(1); state.advance(1); break;
133 }
134 }
135 state.advance(ch.len_utf8())
136 }
137 }
138 else {
139 while let Some(ch) = state.current() {
141 if ch == '\n' || ch == '\r' {
142 break;
143 }
144 state.advance(ch.len_utf8())
145 }
146 }
147 }
148 else {
149 while let Some(ch) = state.current() {
151 if ch == '\n' || ch == '\r' {
152 break;
153 }
154 state.advance(ch.len_utf8())
155 }
156 }
157
158 state.add_token(LuaTokenType::Comment, start_pos, state.get_position());
159 true
160 }
161 else {
162 false
163 }
164 }
165 else {
166 false
167 }
168 }
169
170 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
172 let start_pos = state.get_position();
173
174 if let Some(quote_char) = state.current() {
175 if quote_char == '"' || quote_char == '\'' {
176 state.advance(1); let mut escaped = false;
179 while let Some(ch) = state.current() {
180 if escaped {
181 escaped = false;
182 state.advance(ch.len_utf8())
183 }
184 else if ch == '\\' {
185 escaped = true;
186 state.advance(1)
187 }
188 else if ch == quote_char {
189 state.advance(1); break;
191 }
192 else if ch == '\n' || ch == '\r' {
193 break;
195 }
196 else {
197 state.advance(ch.len_utf8())
198 }
199 }
200
201 state.add_token(LuaTokenType::String, start_pos, state.get_position());
202 true
203 }
204 else if quote_char == '[' {
205 if let Some('[') = state.peek() {
207 state.advance(1); state.advance(1); while let Some(ch) = state.current() {
212 if ch == ']' {
213 if let Some(']') = state.peek() {
214 state.advance(1); state.advance(1); break;
217 }
218 }
219 state.advance(ch.len_utf8())
220 }
221
222 state.add_token(LuaTokenType::String, start_pos, state.get_position());
223 true
224 }
225 else {
226 false
227 }
228 }
229 else {
230 false
231 }
232 }
233 else {
234 false
235 }
236 }
237
238 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
240 let start_pos = state.get_position();
241
242 if let Some(ch) = state.current() {
243 if ch.is_ascii_digit() {
244 if ch == '0' {
246 if let Some(next_ch) = state.peek() {
247 if next_ch == 'x' || next_ch == 'X' {
248 state.advance(1); state.advance(1); while let Some(hex_ch) = state.current() {
253 if hex_ch.is_ascii_hexdigit() { state.advance(1) } else { break }
254 }
255
256 state.add_token(LuaTokenType::Number, start_pos, state.get_position());
257 return true;
258 }
259 }
260 }
261
262 let mut has_dot = false;
264 let mut has_exp = false;
265
266 while let Some(num_ch) = state.current() {
267 if num_ch.is_ascii_digit() {
268 state.advance(1)
269 }
270 else if num_ch == '.' && !has_dot && !has_exp {
271 has_dot = true;
272 state.advance(1)
273 }
274 else if (num_ch == 'e' || num_ch == 'E') && !has_exp {
275 has_exp = true;
276 state.advance(1);
277
278 if let Some(sign_ch) = state.current() {
280 if sign_ch == '+' || sign_ch == '-' {
281 state.advance(1)
282 }
283 }
284 }
285 else {
286 break;
287 }
288 }
289
290 state.add_token(LuaTokenType::Number, start_pos, state.get_position());
291 true
292 }
293 else {
294 false
295 }
296 }
297 else {
298 false
299 }
300 }
301
302 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
304 if let Some(ch) = state.current() {
305 if ch.is_ascii_alphabetic() || ch == '_' {
306 let range = state.take_while(|c| c.is_ascii_alphanumeric() || c == '_');
307 let text = state.get_text_in(range.clone().into());
309 let token_kind = self.keyword_or_identifier(&text);
310 state.add_token(token_kind, range.start, range.end);
311 true
312 }
313 else {
314 false
315 }
316 }
317 else {
318 false
319 }
320 }
321
322 fn keyword_or_identifier(&self, text: &str) -> LuaTokenType {
324 match text {
325 "and" => LuaTokenType::And,
326 "break" => LuaTokenType::Break,
327 "do" => LuaTokenType::Do,
328 "else" => LuaTokenType::Else,
329 "elseif" => LuaTokenType::Elseif,
330 "end" => LuaTokenType::End,
331 "false" => LuaTokenType::False,
332 "for" => LuaTokenType::For,
333 "function" => LuaTokenType::Function,
334 "goto" => LuaTokenType::Goto,
335 "if" => LuaTokenType::If,
336 "in" => LuaTokenType::In,
337 "local" => LuaTokenType::Local,
338 "nil" => LuaTokenType::Nil,
339 "not" => LuaTokenType::Not,
340 "or" => LuaTokenType::Or,
341 "repeat" => LuaTokenType::Repeat,
342 "return" => LuaTokenType::Return,
343 "then" => LuaTokenType::Then,
344 "true" => LuaTokenType::True,
345 "until" => LuaTokenType::Until,
346 "while" => LuaTokenType::While,
347 _ => LuaTokenType::Identifier,
348 }
349 }
350
351 fn lex_operator_or_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
353 let start_pos = state.get_position();
354
355 if let Some(ch) = state.peek() {
356 let token_kind = match ch {
357 '=' => {
358 state.advance(1);
359 if let Some('=') = state.peek() {
360 state.advance(1);
361 LuaTokenType::EqEq
362 }
363 else {
364 LuaTokenType::Eq
365 }
366 }
367 '~' => {
368 state.advance(1);
369 if let Some('=') = state.peek() {
370 state.advance(1);
371 LuaTokenType::TildeEq
372 }
373 else {
374 LuaTokenType::Tilde
375 }
376 }
377 '<' => {
378 state.advance(1);
379 if let Some('=') = state.peek() {
380 state.advance(1);
381 LuaTokenType::LtEq
382 }
383 else if let Some('<') = state.peek() {
384 state.advance(1);
385 LuaTokenType::LtLt
386 }
387 else {
388 LuaTokenType::Lt
389 }
390 }
391 '>' => {
392 state.advance(1);
393 if let Some('=') = state.peek() {
394 state.advance(1);
395 LuaTokenType::GtEq
396 }
397 else if let Some('>') = state.peek() {
398 state.advance(1);
399 LuaTokenType::GtGt
400 }
401 else {
402 LuaTokenType::Gt
403 }
404 }
405 '.' => {
406 state.advance(1);
407 if let Some('.') = state.peek() {
408 state.advance(1);
409 if let Some('.') = state.peek() {
410 state.advance(1);
411 LuaTokenType::DotDotDot
412 }
413 else {
414 LuaTokenType::DotDot
415 }
416 }
417 else {
418 LuaTokenType::Dot
419 }
420 }
421 ':' => {
422 state.advance(1);
423 if let Some(':') = state.peek() {
424 state.advance(1);
425 LuaTokenType::ColonColon
426 }
427 else {
428 LuaTokenType::Colon
429 }
430 }
431 '/' => {
432 state.advance(1);
433 if let Some('/') = state.peek() {
434 state.advance(1);
435 LuaTokenType::SlashSlash
436 }
437 else {
438 LuaTokenType::Slash
439 }
440 }
441 '+' => {
442 state.advance(1);
443 LuaTokenType::Plus
444 }
445 '-' => {
446 state.advance(1);
447 LuaTokenType::Minus
448 }
449 '*' => {
450 state.advance(1);
451 LuaTokenType::Star
452 }
453 '%' => {
454 state.advance(1);
455 LuaTokenType::Percent
456 }
457 '^' => {
458 state.advance(1);
459 LuaTokenType::Caret
460 }
461 '#' => {
462 state.advance(1);
463 LuaTokenType::Hash
464 }
465 '&' => {
466 state.advance(1);
467 LuaTokenType::Ampersand
468 }
469 '|' => {
470 state.advance(1);
471 LuaTokenType::Pipe
472 }
473 '(' => {
474 state.advance(1);
475 LuaTokenType::LeftParen
476 }
477 ')' => {
478 state.advance(1);
479 LuaTokenType::RightParen
480 }
481 '{' => {
482 state.advance(1);
483 LuaTokenType::LeftBrace
484 }
485 '}' => {
486 state.advance(1);
487 LuaTokenType::RightBrace
488 }
489 '[' => {
490 state.advance(1);
491 LuaTokenType::LeftBracket
492 }
493 ']' => {
494 state.advance(1);
495 LuaTokenType::RightBracket
496 }
497 ';' => {
498 state.advance(1);
499 LuaTokenType::Semicolon
500 }
501 ',' => {
502 state.advance(1);
503 LuaTokenType::Comma
504 }
505 _ => return false,
506 };
507
508 state.add_token(token_kind, start_pos, state.get_position());
509 true
510 }
511 else {
512 false
513 }
514 }
515}
516
517impl<'config> Lexer<LuaLanguage> for LuaLexer<'config> {
518 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<LuaLanguage>) -> LexOutput<LuaLanguage> {
519 let mut state = State::new_with_cache(source, 0, cache);
520 let result = self.run(&mut state);
521 if result.is_ok() {
522 state.add_eof()
523 }
524 state.finish_with_cache(result, cache)
525 }
526}