1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::DLanguage, lexer::token_type::DTokenType};
5use oak_core::{Lexer, LexerCache, LexerState, TextEdit, lexer::LexOutput, source::Source};
6
7pub(crate) type State<'a, S> = LexerState<'a, S, DLanguage>;
8
9#[derive(Clone)]
11pub struct DLexer<'config> {
12 config: &'config DLanguage,
13}
14
15impl<'config> Lexer<DLanguage> for DLexer<'config> {
16 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<DLanguage>) -> LexOutput<DLanguage> {
17 let mut state = LexerState::new(source);
18 let result = self.run(&mut state);
19 if result.is_ok() {
20 state.add_eof()
21 }
22 state.finish_with_cache(result, cache)
23 }
24}
25
26impl<'config> DLexer<'config> {
27 pub fn new(config: &'config DLanguage) -> Self {
29 Self { config }
30 }
31
32 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), oak_core::OakError> {
33 while state.not_at_end() {
34 let start_pos = state.get_position();
35
36 if self.skip_whitespace(state) {
38 continue;
39 }
40
41 if self.lex_newline(state) {
42 continue;
43 }
44
45 if self.lex_line_comment(state) {
46 continue;
47 }
48
49 if self.lex_block_comment(state) {
50 continue;
51 }
52
53 if self.lex_nested_comment(state) {
54 continue;
55 }
56
57 if self.lex_identifier_or_keyword(state) {
58 continue;
59 }
60
61 if self.lex_number(state) {
62 continue;
63 }
64
65 if self.lex_string(state) {
66 continue;
67 }
68
69 if self.lex_character(state) {
70 continue;
71 }
72
73 if self.lex_operator(state) {
74 continue;
75 }
76
77 if self.lex_delimiter(state) {
78 continue;
79 }
80
81 state.advance_if_dead_lock(start_pos);
83 if state.get_position() > start_pos {
84 state.add_token(DTokenType::Error, start_pos, state.get_position())
85 }
86 }
87 Ok(())
88 }
89
90 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
91 if let Some(ch) = state.peek() {
92 if ch.is_whitespace() && ch != '\n' && ch != '\r' {
93 let start_pos = state.get_position();
94 while let Some(ch) = state.peek() {
95 if !ch.is_whitespace() || ch == '\n' || ch == '\r' {
96 break;
97 }
98 state.advance(ch.len_utf8())
99 }
100 state.add_token(DTokenType::Whitespace, start_pos, state.get_position());
101 return true;
102 }
103 }
104 false
105 }
106
107 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
108 if let Some(ch) = state.peek() {
109 if ch == '\n' || ch == '\r' {
110 let start_pos = state.get_position();
111 if ch == '\r' {
112 state.advance(1);
113 if state.peek() == Some('\n') {
114 state.advance(1)
115 }
116 }
117 else {
118 state.advance(1)
119 }
120 state.add_token(DTokenType::Newline, start_pos, state.get_position());
121 return true;
122 }
123 }
124 false
125 }
126
127 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
128 if let Some(ch) = state.peek() {
129 if ch.is_alphabetic() || ch == '_' {
130 let start_pos = state.get_position();
131 while let Some(ch) = state.peek() {
132 if ch.is_alphanumeric() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
133 }
134 let end_pos = state.get_position();
135 let text = state.get_text_in((start_pos..end_pos).into());
136
137 let kind = match text.as_ref() {
138 "module" => DTokenType::ModuleKeyword,
139 "import" => DTokenType::ImportKeyword,
140 "public" => DTokenType::PublicKeyword,
141 "private" => DTokenType::PrivateKeyword,
142 "protected" => DTokenType::ProtectedKeyword,
143 "package" => DTokenType::PackageKeyword,
144 "export" => DTokenType::ExportKeyword,
145 "static" => DTokenType::StaticKeyword,
146 "final" => DTokenType::FinalKeyword,
147 "abstract" => DTokenType::AbstractKeyword,
148 "override" => DTokenType::OverrideKeyword,
149 "synchronized" => DTokenType::SynchronizedKeyword,
150 "const" => DTokenType::ConstKeyword,
151 "immutable" => DTokenType::ImmutableKeyword,
152 "inout" => DTokenType::InoutKeyword,
153 "shared" => DTokenType::SharedKeyword,
154 "class" => DTokenType::ClassKeyword,
155 "struct" => DTokenType::StructKeyword,
156 "interface" => DTokenType::InterfaceKeyword,
157 "union" => DTokenType::UnionKeyword,
158 "enum" => DTokenType::EnumKeyword,
159 "function" => DTokenType::FunctionKeyword,
160 "delegate" => DTokenType::DelegateKeyword,
161 "if" => DTokenType::IfKeyword,
162 "else" => DTokenType::ElseKeyword,
163 "while" => DTokenType::WhileKeyword,
164 "for" => DTokenType::ForKeyword,
165 "foreach" => DTokenType::ForeachKeyword,
166 "do" => DTokenType::DoKeyword,
167 "switch" => DTokenType::SwitchKeyword,
168 "case" => DTokenType::CaseKeyword,
169 "default" => DTokenType::DefaultKeyword,
170 "break" => DTokenType::BreakKeyword,
171 "continue" => DTokenType::ContinueKeyword,
172 "return" => DTokenType::ReturnKeyword,
173 "goto" => DTokenType::GotoKeyword,
174 "try" => DTokenType::TryKeyword,
175 "catch" => DTokenType::CatchKeyword,
176 "finally" => DTokenType::FinallyKeyword,
177 "throw" => DTokenType::ThrowKeyword,
178 "scope" => DTokenType::ScopeKeyword,
179 "with" => DTokenType::WithKeyword,
180 "asm" => DTokenType::AsmKeyword,
181 "mixin" => DTokenType::MixinKeyword,
182 "template" => DTokenType::TemplateKeyword,
183 "alias" => DTokenType::AliasKeyword,
184 "typeof" => DTokenType::TypeofKeyword,
185 "typeid" => DTokenType::TypeidKeyword,
186 "is" => DTokenType::IsKeyword,
187 "in" => DTokenType::InKeyword,
188 "out" => DTokenType::OutKeyword,
189 "ref" => DTokenType::RefKeyword,
190 "lazy" => DTokenType::LazyKeyword,
191 "auto" => DTokenType::AutoKeyword,
192 "extern" => DTokenType::ExternKeyword,
193 "align" => DTokenType::AlignKeyword,
194 "pragma" => DTokenType::PragmaKeyword,
195 "debug" => DTokenType::DebugKeyword,
196 "version" => DTokenType::VersionKeyword,
197 "unittest" => DTokenType::UnitTestKeyword,
198 "invariant" => DTokenType::InvariantKeyword,
199 "body" => DTokenType::BodyKeyword,
200 "new" => DTokenType::NewKeyword,
201 "delete" => DTokenType::DeleteKeyword,
202 "this" => DTokenType::ThisKeyword,
203 "super" => DTokenType::SuperKeyword,
204 "null" => DTokenType::NullKeyword,
205 "true" => DTokenType::TrueKeyword,
206 "false" => DTokenType::FalseKeyword,
207 "cast" => DTokenType::CastKeyword,
208 "void" => DTokenType::VoidType,
209 "bool" => DTokenType::BoolType,
210 "byte" => DTokenType::ByteType,
211 "ubyte" => DTokenType::UbyteType,
212 "short" => DTokenType::ShortType,
213 "ushort" => DTokenType::UshortType,
214 "int" => DTokenType::IntType,
215 "uint" => DTokenType::UintType,
216 "long" => DTokenType::LongType,
217 "ulong" => DTokenType::UlongType,
218 "cent" => DTokenType::CentType,
219 "ucent" => DTokenType::UcentType,
220 "float" => DTokenType::FloatType,
221 "double" => DTokenType::DoubleType,
222 "real" => DTokenType::RealType,
223 "ifloat" => DTokenType::IfloatType,
224 "idouble" => DTokenType::IdoubleType,
225 "ireal" => DTokenType::IrealType,
226 "cfloat" => DTokenType::CfloatType,
227 "cdouble" => DTokenType::CdoubleType,
228 "creal" => DTokenType::CrealType,
229 "char" => DTokenType::CharType,
230 "wchar" => DTokenType::WcharType,
231 "dchar" => DTokenType::DcharType,
232 "string" => DTokenType::StringType,
233 "wstring" => DTokenType::WstringType,
234 "dstring" => DTokenType::DstringType,
235 "typedef" => DTokenType::TypedefKeyword,
236 "pure" => DTokenType::PureKeyword,
237 "nothrow" => DTokenType::NothrowKeyword,
238 "safe" => DTokenType::SafeKeyword,
239 "trusted" => DTokenType::TrustedKeyword,
240 "system" => DTokenType::SystemKeyword,
241 "nogc" => DTokenType::NogcKeyword,
242 "property" => DTokenType::PropertyKeyword,
243 "disable" => DTokenType::DisableKeyword,
244 "deprecated" => DTokenType::DeprecatedKeyword,
245 _ => DTokenType::Identifier,
246 };
247
248 state.add_token(kind, start_pos, end_pos);
249 return true;
250 }
251 }
252 false
253 }
254
255 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
256 if let Some(ch) = state.peek() {
257 if ch.is_ascii_digit() {
258 let start_pos = state.get_position();
259
260 while let Some(ch) = state.peek() {
262 if ch.is_ascii_digit() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
263 }
264
265 if let Some('.') = state.peek() {
267 state.advance(1);
268 while let Some(ch) = state.peek() {
269 if ch.is_ascii_digit() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
270 }
271 }
272
273 if let Some(ch) = state.peek() {
275 if ch == 'e' || ch == 'E' {
276 state.advance(1);
277 if let Some(ch) = state.peek() {
278 if ch == '+' || ch == '-' {
279 state.advance(1)
280 }
281 }
282 while let Some(ch) = state.peek() {
283 if ch.is_ascii_digit() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
284 }
285 }
286 }
287
288 if let Some(ch) = state.peek() {
290 if ch == 'f' || ch == 'F' || ch == 'L' || ch == 'u' || ch == 'U' {
291 state.advance(1)
292 }
293 }
294
295 state.add_token(DTokenType::IntegerLiteral, start_pos, state.get_position());
296 return true;
297 }
298 }
299 false
300 }
301
302 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
303 if let Some(ch) = state.peek() {
304 if ch == '"' || ch == '\'' {
305 let start_pos = state.get_position();
306 let quote = ch;
307 state.advance(1); while let Some(ch) = state.peek() {
310 if ch == quote {
311 state.advance(1); break;
313 }
314 else if ch == '\\' {
315 state.advance(1); if state.peek().is_some() {
317 state.advance(1); }
319 }
320 else {
321 state.advance(ch.len_utf8())
322 }
323 }
324
325 state.add_token(DTokenType::StringLiteral, start_pos, state.get_position());
326 return true;
327 }
328 }
329 false
330 }
331
332 fn lex_character<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
333 if let Some('\'') = state.peek() {
334 let start_pos = state.get_position();
335 state.advance(1); if let Some(ch) = state.peek() {
338 if ch == '\\' {
339 state.advance(1); if state.peek().is_some() {
341 state.advance(1); }
343 }
344 else {
345 state.advance(ch.len_utf8())
346 }
347 }
348
349 if let Some('\'') = state.peek() {
350 state.advance(1); }
352
353 state.add_token(DTokenType::CharLiteral, start_pos, state.get_position());
354 return true;
355 }
356 false
357 }
358
359 fn lex_line_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
360 if let Some('/') = state.peek() {
361 if let Some('/') = state.peek_next_n(1) {
362 let start_pos = state.get_position();
363 state.advance(2);
364 while let Some(ch) = state.peek() {
365 if ch == '\n' || ch == '\r' {
366 break;
367 }
368 state.advance(ch.len_utf8())
369 }
370 state.add_token(DTokenType::LineComment, start_pos, state.get_position());
371 return true;
372 }
373 }
374 false
375 }
376
377 fn lex_block_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
378 if let Some('/') = state.peek() {
379 if let Some('*') = state.peek_next_n(1) {
380 let start_pos = state.get_position();
381 state.advance(2);
382 while let Some(ch) = state.peek() {
383 if ch == '*' {
384 state.advance(1);
385 if state.peek() == Some('/') {
386 state.advance(1);
387 break;
388 }
389 }
390 else {
391 state.advance(ch.len_utf8())
392 }
393 }
394 state.add_token(DTokenType::BlockComment, start_pos, state.get_position());
395 return true;
396 }
397 }
398 false
399 }
400
401 fn lex_nested_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
402 if let Some('/') = state.peek() {
403 if let Some('+') = state.peek_next_n(1) {
404 let start_pos = state.get_position();
405 state.advance(2);
406 let mut depth = 1;
407 while let Some(ch) = state.peek() {
408 if ch == '/' {
409 state.advance(1);
410 if state.peek() == Some('+') {
411 state.advance(1);
412 depth += 1
413 }
414 }
415 else if ch == '+' {
416 state.advance(1);
417 if state.peek() == Some('/') {
418 state.advance(1);
419 depth -= 1;
420 if depth == 0 {
421 break;
422 }
423 }
424 }
425 else {
426 state.advance(ch.len_utf8())
427 }
428 }
429 state.add_token(DTokenType::NestedComment, start_pos, state.get_position());
430 return true;
431 }
432 }
433 false
434 }
435
436 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
437 if let Some(ch) = state.peek() {
438 let start_pos = state.get_position();
439
440 match ch {
441 '+' => {
442 state.advance(1);
443 if let Some('=') = state.peek() {
444 state.advance(1);
445 state.add_token(DTokenType::PlusAssign, start_pos, state.get_position())
446 }
447 else if let Some('+') = state.peek() {
448 state.advance(1);
449 state.add_token(DTokenType::Increment, start_pos, state.get_position())
450 }
451 else {
452 state.add_token(DTokenType::Plus, start_pos, state.get_position())
453 }
454 return true;
455 }
456 '-' => {
457 state.advance(1);
458 if let Some('=') = state.peek() {
459 state.advance(1);
460 state.add_token(DTokenType::MinusAssign, start_pos, state.get_position())
461 }
462 else if let Some('-') = state.peek() {
463 state.advance(1);
464 state.add_token(DTokenType::Decrement, start_pos, state.get_position())
465 }
466 else {
467 state.add_token(DTokenType::Minus, start_pos, state.get_position())
468 }
469 return true;
470 }
471 '*' => {
472 state.advance(1);
473 if let Some('=') = state.peek() {
474 state.advance(1);
475 state.add_token(DTokenType::MultiplyAssign, start_pos, state.get_position())
476 }
477 else {
478 state.add_token(DTokenType::Multiply, start_pos, state.get_position())
479 }
480 return true;
481 }
482 '/' => {
483 return false;
485 }
486 '%' => {
487 state.advance(1);
488 if let Some('=') = state.peek() {
489 state.advance(1);
490 state.add_token(DTokenType::ModuloAssign, start_pos, state.get_position())
491 }
492 else {
493 state.add_token(DTokenType::Modulo, start_pos, state.get_position())
494 }
495 return true;
496 }
497 '&' => {
498 state.advance(1);
499 if let Some('&') = state.peek() {
500 state.advance(1);
501 state.add_token(DTokenType::LogicalAnd, start_pos, state.get_position())
502 }
503 else if let Some('=') = state.peek() {
504 state.advance(1);
505 state.add_token(DTokenType::BitwiseAndAssign, start_pos, state.get_position())
506 }
507 else {
508 state.add_token(DTokenType::BitwiseAnd, start_pos, state.get_position())
509 }
510 return true;
511 }
512 '|' => {
513 state.advance(1);
514 if let Some('|') = state.peek() {
515 state.advance(1);
516 state.add_token(DTokenType::LogicalOr, start_pos, state.get_position())
517 }
518 else if let Some('=') = state.peek() {
519 state.advance(1);
520 state.add_token(DTokenType::BitwiseOrAssign, start_pos, state.get_position())
521 }
522 else {
523 state.add_token(DTokenType::BitwiseOr, start_pos, state.get_position())
524 }
525 return true;
526 }
527 '^' => {
528 state.advance(1);
529 if let Some('=') = state.peek() {
530 state.advance(1);
531 state.add_token(DTokenType::BitwiseXorAssign, start_pos, state.get_position())
532 }
533 else {
534 state.add_token(DTokenType::BitwiseXor, start_pos, state.get_position())
535 }
536 return true;
537 }
538 '~' => {
539 state.advance(1);
540 if let Some('=') = state.peek() {
541 state.advance(1);
542 state.add_token(DTokenType::ConcatenateAssign, start_pos, state.get_position())
543 }
544 else {
545 state.add_token(DTokenType::BitwiseNot, start_pos, state.get_position())
546 }
547 return true;
548 }
549 '!' => {
550 state.advance(1);
551 if let Some('=') = state.peek() {
552 state.advance(1);
553 state.add_token(DTokenType::NotEqual, start_pos, state.get_position())
554 }
555 else {
556 state.add_token(DTokenType::Not, start_pos, state.get_position())
557 }
558 return true;
559 }
560 '<' => {
561 state.advance(1);
562 if let Some('<') = state.peek() {
563 state.advance(1);
564 if let Some('=') = state.peek() {
565 state.advance(1);
566 state.add_token(DTokenType::LeftShiftAssign, start_pos, state.get_position())
567 }
568 else {
569 state.add_token(DTokenType::LeftShift, start_pos, state.get_position())
570 }
571 }
572 else if let Some('=') = state.peek() {
573 state.advance(1);
574 state.add_token(DTokenType::LessEqual, start_pos, state.get_position())
575 }
576 else {
577 state.add_token(DTokenType::Less, start_pos, state.get_position())
578 }
579 return true;
580 }
581 '>' => {
582 state.advance(1);
583 if let Some('>') = state.peek() {
584 state.advance(1);
585 if let Some('=') = state.peek() {
586 state.advance(1);
587 state.add_token(DTokenType::RightShiftAssign, start_pos, state.get_position())
588 }
589 else {
590 state.add_token(DTokenType::RightShift, start_pos, state.get_position())
591 }
592 }
593 else if let Some('=') = state.peek() {
594 state.advance(1);
595 state.add_token(DTokenType::GreaterEqual, start_pos, state.get_position())
596 }
597 else {
598 state.add_token(DTokenType::Greater, start_pos, state.get_position())
599 }
600 return true;
601 }
602 '=' => {
603 state.advance(1);
604 if let Some('=') = state.peek() {
605 state.advance(1);
606 state.add_token(DTokenType::Equal, start_pos, state.get_position())
607 }
608 else {
609 state.add_token(DTokenType::Assign, start_pos, state.get_position())
610 }
611 return true;
612 }
613 _ => false,
614 }
615 }
616 else {
617 false
618 }
619 }
620
621 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
622 if let Some(ch) = state.peek() {
623 let start_pos = state.get_position();
624
625 match ch {
626 '(' => {
627 state.advance(1);
628 state.add_token(DTokenType::LeftParen, start_pos, state.get_position());
629 return true;
630 }
631 ')' => {
632 state.advance(1);
633 state.add_token(DTokenType::RightParen, start_pos, state.get_position());
634 return true;
635 }
636 '[' => {
637 state.advance(1);
638 state.add_token(DTokenType::LeftBracket, start_pos, state.get_position());
639 return true;
640 }
641 ']' => {
642 state.advance(1);
643 state.add_token(DTokenType::RightBracket, start_pos, state.get_position());
644 return true;
645 }
646 '{' => {
647 state.advance(1);
648 state.add_token(DTokenType::LeftBrace, start_pos, state.get_position());
649 return true;
650 }
651 '}' => {
652 state.advance(1);
653 state.add_token(DTokenType::RightBrace, start_pos, state.get_position());
654 return true;
655 }
656 ';' => {
657 state.advance(1);
658 state.add_token(DTokenType::Semicolon, start_pos, state.get_position());
659 return true;
660 }
661 ',' => {
662 state.advance(1);
663 state.add_token(DTokenType::Comma, start_pos, state.get_position());
664 return true;
665 }
666 '.' => {
667 state.advance(1);
668 state.add_token(DTokenType::Dot, start_pos, state.get_position());
669 return true;
670 }
671 ':' => {
672 state.advance(1);
673 state.add_token(DTokenType::Colon, start_pos, state.get_position());
674 return true;
675 }
676 '?' => {
677 state.advance(1);
678 state.add_token(DTokenType::Question, start_pos, state.get_position());
679 return true;
680 }
681 _ => false,
682 }
683 }
684 else {
685 false
686 }
687 }
688}