1use crate::{kind::DSyntaxKind, language::DLanguage};
2use oak_core::{Lexer, LexerState, SourceText, lexer::LexOutput, source::Source};
3
4type State<'input> = LexerState<&'input SourceText, DLanguage>;
5
6pub struct DLexer<'config> {
8 config: &'config DLanguage,
9}
10
11impl<'config> Lexer<DLanguage> for DLexer<'config> {
12 fn lex(&self, source: impl Source) -> LexOutput<DLanguage> {
13 let source_text = SourceText::new(source.get_text_in((0..source.length()).into()));
14 let mut state = LexerState::new(&source_text);
15
16 while state.not_at_end() {
17 if self.skip_whitespace(&mut state) {
19 continue;
20 }
21
22 if self.lex_newline(&mut state) {
23 continue;
24 }
25
26 if self.lex_line_comment(&mut state) {
27 continue;
28 }
29
30 if self.lex_block_comment(&mut state) {
31 continue;
32 }
33
34 if self.lex_nested_comment(&mut state) {
35 continue;
36 }
37
38 if self.lex_identifier_or_keyword(&mut state, &source_text) {
39 continue;
40 }
41
42 if self.lex_number(&mut state) {
43 continue;
44 }
45
46 if self.lex_string(&mut state) {
47 continue;
48 }
49
50 if self.lex_character(&mut state) {
51 continue;
52 }
53
54 if self.lex_operator(&mut state) {
55 continue;
56 }
57
58 if self.lex_delimiter(&mut state) {
59 continue;
60 }
61
62 let start_pos = state.get_position();
64 if let Some(ch) = state.peek() {
65 state.advance(ch.len_utf8());
66 state.add_token(DSyntaxKind::Error, start_pos, state.get_position());
67 }
68 else {
69 break;
70 }
71 }
72
73 let eof_pos = state.get_position();
75 state.add_token(DSyntaxKind::Eof, eof_pos, eof_pos);
76
77 state.finish(Ok(()))
78 }
79
80 fn lex_incremental(
81 &self,
82 source: impl Source,
83 _old_tree_len: usize,
84 _cache: oak_core::IncrementalCache<DLanguage>,
85 ) -> LexOutput<DLanguage> {
86 self.lex(source)
87 }
88}
89
90impl<'config> DLexer<'config> {
91 pub fn new(config: &'config DLanguage) -> Self {
92 Self { config }
93 }
94
95 fn skip_whitespace(&self, state: &mut State) -> bool {
96 if let Some(ch) = state.peek() {
97 if ch.is_whitespace() && ch != '\n' && ch != '\r' {
98 let start_pos = state.get_position();
99 while let Some(ch) = state.peek() {
100 if !ch.is_whitespace() || ch == '\n' || ch == '\r' {
101 break;
102 }
103 state.advance(ch.len_utf8());
104 }
105 state.add_token(DSyntaxKind::Whitespace, start_pos, state.get_position());
106 return true;
107 }
108 }
109 false
110 }
111
112 fn lex_newline(&self, state: &mut State) -> bool {
113 if let Some(ch) = state.peek() {
114 if ch == '\n' || ch == '\r' {
115 let start_pos = state.get_position();
116 if ch == '\r' {
117 state.advance(1);
118 if let Some('\n') = state.peek() {
119 state.advance(1);
120 }
121 }
122 else {
123 state.advance(1);
124 }
125 state.add_token(DSyntaxKind::Newline, start_pos, state.get_position());
126 return true;
127 }
128 }
129 false
130 }
131
132 fn lex_identifier_or_keyword(&self, state: &mut State, source: &SourceText) -> bool {
133 if let Some(ch) = state.peek() {
134 if ch.is_alphabetic() || ch == '_' {
135 let start_pos = state.get_position();
136
137 state.advance(ch.len_utf8());
139
140 while let Some(ch) = state.peek() {
142 if ch.is_alphanumeric() || ch == '_' {
143 state.advance(ch.len_utf8());
144 }
145 else {
146 break;
147 }
148 }
149
150 let end_pos = state.get_position();
151 let text = source.get_text_in((start_pos..end_pos).into());
152
153 let kind = match text {
155 "module" => DSyntaxKind::ModuleKeyword,
156 "import" => DSyntaxKind::ImportKeyword,
157 "public" => DSyntaxKind::PublicKeyword,
158 "private" => DSyntaxKind::PrivateKeyword,
159 "protected" => DSyntaxKind::ProtectedKeyword,
160 "package" => DSyntaxKind::PackageKeyword,
161 "export" => DSyntaxKind::ExportKeyword,
162 "static" => DSyntaxKind::StaticKeyword,
163 "final" => DSyntaxKind::FinalKeyword,
164 "abstract" => DSyntaxKind::AbstractKeyword,
165 "override" => DSyntaxKind::OverrideKeyword,
166 "synchronized" => DSyntaxKind::SynchronizedKeyword,
167 "const" => DSyntaxKind::ConstKeyword,
168 "immutable" => DSyntaxKind::ImmutableKeyword,
169 "inout" => DSyntaxKind::InoutKeyword,
170 "shared" => DSyntaxKind::SharedKeyword,
171 "class" => DSyntaxKind::ClassKeyword,
172 "struct" => DSyntaxKind::StructKeyword,
173 "interface" => DSyntaxKind::InterfaceKeyword,
174 "union" => DSyntaxKind::UnionKeyword,
175 "enum" => DSyntaxKind::EnumKeyword,
176 "function" => DSyntaxKind::FunctionKeyword,
177 "delegate" => DSyntaxKind::DelegateKeyword,
178 "if" => DSyntaxKind::IfKeyword,
179 "else" => DSyntaxKind::ElseKeyword,
180 "while" => DSyntaxKind::WhileKeyword,
181 "for" => DSyntaxKind::ForKeyword,
182 "foreach" => DSyntaxKind::ForeachKeyword,
183 "do" => DSyntaxKind::DoKeyword,
184 "switch" => DSyntaxKind::SwitchKeyword,
185 "case" => DSyntaxKind::CaseKeyword,
186 "default" => DSyntaxKind::DefaultKeyword,
187 "break" => DSyntaxKind::BreakKeyword,
188 "continue" => DSyntaxKind::ContinueKeyword,
189 "return" => DSyntaxKind::ReturnKeyword,
190 "goto" => DSyntaxKind::GotoKeyword,
191 "try" => DSyntaxKind::TryKeyword,
192 "catch" => DSyntaxKind::CatchKeyword,
193 "finally" => DSyntaxKind::FinallyKeyword,
194 "throw" => DSyntaxKind::ThrowKeyword,
195 "scope" => DSyntaxKind::ScopeKeyword,
196 "with" => DSyntaxKind::WithKeyword,
197 "asm" => DSyntaxKind::AsmKeyword,
198 "mixin" => DSyntaxKind::MixinKeyword,
199 "template" => DSyntaxKind::TemplateKeyword,
200 "alias" => DSyntaxKind::AliasKeyword,
201 "typeof" => DSyntaxKind::TypeofKeyword,
202 "typeid" => DSyntaxKind::TypeidKeyword,
203 "is" => DSyntaxKind::IsKeyword,
204 "in" => DSyntaxKind::InKeyword,
205 "out" => DSyntaxKind::OutKeyword,
206 "ref" => DSyntaxKind::RefKeyword,
207 "lazy" => DSyntaxKind::LazyKeyword,
208 "auto" => DSyntaxKind::AutoKeyword,
209 "extern" => DSyntaxKind::ExternKeyword,
210 "align" => DSyntaxKind::AlignKeyword,
211 "pragma" => DSyntaxKind::PragmaKeyword,
212 "debug" => DSyntaxKind::DebugKeyword,
213 "version" => DSyntaxKind::VersionKeyword,
214 "unittest" => DSyntaxKind::UnitTestKeyword,
215 "invariant" => DSyntaxKind::InvariantKeyword,
216 "body" => DSyntaxKind::BodyKeyword,
217 "new" => DSyntaxKind::NewKeyword,
218 "delete" => DSyntaxKind::DeleteKeyword,
219 "this" => DSyntaxKind::ThisKeyword,
220 "super" => DSyntaxKind::SuperKeyword,
221 "null" => DSyntaxKind::NullKeyword,
222 "true" => DSyntaxKind::TrueKeyword,
223 "false" => DSyntaxKind::FalseKeyword,
224 "cast" => DSyntaxKind::CastKeyword,
225 "void" => DSyntaxKind::VoidType,
226 "bool" => DSyntaxKind::BoolType,
227 "byte" => DSyntaxKind::ByteType,
228 "ubyte" => DSyntaxKind::UbyteType,
229 "short" => DSyntaxKind::ShortType,
230 "ushort" => DSyntaxKind::UshortType,
231 "int" => DSyntaxKind::IntType,
232 "uint" => DSyntaxKind::UintType,
233 "long" => DSyntaxKind::LongType,
234 "ulong" => DSyntaxKind::UlongType,
235 "cent" => DSyntaxKind::CentType,
236 "ucent" => DSyntaxKind::UcentType,
237 "float" => DSyntaxKind::FloatType,
238 "double" => DSyntaxKind::DoubleType,
239 "real" => DSyntaxKind::RealType,
240 "ifloat" => DSyntaxKind::IfloatType,
241 "idouble" => DSyntaxKind::IdoubleType,
242 "ireal" => DSyntaxKind::IrealType,
243 "cfloat" => DSyntaxKind::CfloatType,
244 "cdouble" => DSyntaxKind::CdoubleType,
245 "creal" => DSyntaxKind::CrealType,
246 "char" => DSyntaxKind::CharType,
247 "wchar" => DSyntaxKind::WcharType,
248 "dchar" => DSyntaxKind::DcharType,
249 "string" => DSyntaxKind::StringType,
250 "wstring" => DSyntaxKind::WstringType,
251 "dstring" => DSyntaxKind::DstringType,
252 "typedef" => DSyntaxKind::TypedefKeyword,
253 "pure" => DSyntaxKind::PureKeyword,
254 "nothrow" => DSyntaxKind::NothrowKeyword,
255 "safe" => DSyntaxKind::SafeKeyword,
256 "trusted" => DSyntaxKind::TrustedKeyword,
257 "system" => DSyntaxKind::SystemKeyword,
258 "nogc" => DSyntaxKind::NogcKeyword,
259 "property" => DSyntaxKind::PropertyKeyword,
260 "disable" => DSyntaxKind::DisableKeyword,
261 "deprecated" => DSyntaxKind::DeprecatedKeyword,
262 _ => DSyntaxKind::Identifier,
263 };
264
265 state.add_token(kind, start_pos, end_pos);
266 return true;
267 }
268 }
269 false
270 }
271
272 fn lex_number(&self, state: &mut State) -> bool {
273 if let Some(ch) = state.peek() {
274 if ch.is_ascii_digit() {
275 let start_pos = state.get_position();
276
277 while let Some(ch) = state.peek() {
279 if ch.is_ascii_digit() || ch == '_' {
280 state.advance(ch.len_utf8());
281 }
282 else {
283 break;
284 }
285 }
286
287 if let Some('.') = state.peek() {
289 state.advance(1);
290 while let Some(ch) = state.peek() {
291 if ch.is_ascii_digit() || ch == '_' {
292 state.advance(ch.len_utf8());
293 }
294 else {
295 break;
296 }
297 }
298 }
299
300 if let Some(ch) = state.peek() {
302 if ch == 'e' || ch == 'E' {
303 state.advance(1);
304 if let Some(ch) = state.peek() {
305 if ch == '+' || ch == '-' {
306 state.advance(1);
307 }
308 }
309 while let Some(ch) = state.peek() {
310 if ch.is_ascii_digit() || ch == '_' {
311 state.advance(ch.len_utf8());
312 }
313 else {
314 break;
315 }
316 }
317 }
318 }
319
320 if let Some(ch) = state.peek() {
322 if ch == 'f' || ch == 'F' || ch == 'L' || ch == 'u' || ch == 'U' {
323 state.advance(1);
324 }
325 }
326
327 state.add_token(DSyntaxKind::IntegerLiteral, start_pos, state.get_position());
328 return true;
329 }
330 }
331 false
332 }
333
334 fn lex_string(&self, state: &mut State) -> bool {
335 if let Some(ch) = state.peek() {
336 if ch == '"' || ch == '\'' {
337 let start_pos = state.get_position();
338 let quote = ch;
339 state.advance(1); while let Some(ch) = state.peek() {
342 if ch == quote {
343 state.advance(1); break;
345 }
346 else if ch == '\\' {
347 state.advance(1); if state.peek().is_some() {
349 state.advance(1); }
351 }
352 else {
353 state.advance(ch.len_utf8());
354 }
355 }
356
357 state.add_token(DSyntaxKind::StringLiteral, start_pos, state.get_position());
358 return true;
359 }
360 }
361 false
362 }
363
364 fn lex_character(&self, state: &mut State) -> bool {
365 if let Some('\'') = state.peek() {
366 let start_pos = state.get_position();
367 state.advance(1); if let Some(ch) = state.peek() {
370 if ch == '\\' {
371 state.advance(1); if state.peek().is_some() {
373 state.advance(1); }
375 }
376 else {
377 state.advance(ch.len_utf8());
378 }
379 }
380
381 if let Some('\'') = state.peek() {
382 state.advance(1); }
384
385 state.add_token(DSyntaxKind::CharLiteral, start_pos, state.get_position());
386 return true;
387 }
388 false
389 }
390
391 fn lex_line_comment(&self, state: &mut State) -> bool {
392 if let Some('/') = state.peek() {
393 let current_pos = state.get_position();
395 state.advance(1);
396 if let Some('/') = state.peek() {
397 state.set_position(current_pos);
399 let start_pos = state.get_position();
400 state.advance(2); while let Some(ch) = state.peek() {
403 if ch == '\n' {
404 break;
405 }
406 state.advance(ch.len_utf8());
407 }
408
409 state.add_token(DSyntaxKind::LineComment, start_pos, state.get_position());
410 return true;
411 }
412 else {
413 state.set_position(current_pos);
415 }
416 }
417 false
418 }
419
420 fn lex_block_comment(&self, state: &mut State) -> bool {
421 if let Some('/') = state.peek() {
422 let current_pos = state.get_position();
424 state.advance(1);
425 if let Some('*') = state.peek() {
426 state.set_position(current_pos);
428 let start_pos = state.get_position();
429 state.advance(2); while let Some(ch) = state.peek() {
432 if ch == '*' {
433 state.advance(1);
434 if let Some('/') = state.peek() {
435 state.advance(1); break;
437 }
438 }
439 else {
440 state.advance(ch.len_utf8());
441 }
442 }
443
444 state.add_token(DSyntaxKind::BlockComment, start_pos, state.get_position());
445 return true;
446 }
447 else {
448 state.set_position(current_pos);
450 }
451 }
452 false
453 }
454
455 fn lex_nested_comment(&self, state: &mut State) -> bool {
456 if let Some('/') = state.peek() {
457 let current_pos = state.get_position();
459 state.advance(1);
460 if let Some('+') = state.peek() {
461 let start_pos = state.get_position();
463 state.advance(2); let mut depth = 1;
466 while depth > 0 {
467 let Some(ch) = state.peek()
468 else {
469 break;
470 };
471 if ch == '/' {
472 state.advance(1);
473 if let Some('+') = state.peek() {
474 state.advance(1);
475 depth += 1;
476 }
477 }
478 else if ch == '+' {
479 state.advance(1);
480 if let Some('/') = state.peek() {
481 state.advance(1);
482 depth -= 1;
483 }
484 }
485 else {
486 state.advance(ch.len_utf8());
487 }
488 }
489
490 state.add_token(DSyntaxKind::NestedComment, start_pos, state.get_position());
491 return true;
492 }
493 else {
494 state.set_position(current_pos);
496 }
497 }
498 false
499 }
500
501 fn lex_operator(&self, state: &mut State) -> bool {
502 if let Some(ch) = state.peek() {
503 let start_pos = state.get_position();
504
505 match ch {
506 '+' => {
507 state.advance(1);
508 if let Some('=') = state.peek() {
509 state.advance(1);
510 state.add_token(DSyntaxKind::PlusAssign, start_pos, state.get_position());
511 }
512 else if let Some('+') = state.peek() {
513 state.advance(1);
514 state.add_token(DSyntaxKind::Increment, start_pos, state.get_position());
515 }
516 else {
517 state.add_token(DSyntaxKind::Plus, start_pos, state.get_position());
518 }
519 return true;
520 }
521 '-' => {
522 state.advance(1);
523 if let Some('=') = state.peek() {
524 state.advance(1);
525 state.add_token(DSyntaxKind::MinusAssign, start_pos, state.get_position());
526 }
527 else if let Some('-') = state.peek() {
528 state.advance(1);
529 state.add_token(DSyntaxKind::Decrement, start_pos, state.get_position());
530 }
531 else {
532 state.add_token(DSyntaxKind::Minus, start_pos, state.get_position());
533 }
534 return true;
535 }
536 '*' => {
537 state.advance(1);
538 if let Some('=') = state.peek() {
539 state.advance(1);
540 state.add_token(DSyntaxKind::MultiplyAssign, start_pos, state.get_position());
541 }
542 else {
543 state.add_token(DSyntaxKind::Multiply, start_pos, state.get_position());
544 }
545 return true;
546 }
547 '/' => {
548 return false;
550 }
551 '%' => {
552 state.advance(1);
553 if let Some('=') = state.peek() {
554 state.advance(1);
555 state.add_token(DSyntaxKind::ModuloAssign, start_pos, state.get_position());
556 }
557 else {
558 state.add_token(DSyntaxKind::Modulo, start_pos, state.get_position());
559 }
560 return true;
561 }
562 '&' => {
563 state.advance(1);
564 if let Some('&') = state.peek() {
565 state.advance(1);
566 state.add_token(DSyntaxKind::LogicalAnd, start_pos, state.get_position());
567 }
568 else if let Some('=') = state.peek() {
569 state.advance(1);
570 state.add_token(DSyntaxKind::BitwiseAndAssign, start_pos, state.get_position());
571 }
572 else {
573 state.add_token(DSyntaxKind::BitwiseAnd, start_pos, state.get_position());
574 }
575 return true;
576 }
577 '|' => {
578 state.advance(1);
579 if let Some('|') = state.peek() {
580 state.advance(1);
581 state.add_token(DSyntaxKind::LogicalOr, start_pos, state.get_position());
582 }
583 else if let Some('=') = state.peek() {
584 state.advance(1);
585 state.add_token(DSyntaxKind::BitwiseOrAssign, start_pos, state.get_position());
586 }
587 else {
588 state.add_token(DSyntaxKind::BitwiseOr, start_pos, state.get_position());
589 }
590 return true;
591 }
592 '^' => {
593 state.advance(1);
594 if let Some('=') = state.peek() {
595 state.advance(1);
596 state.add_token(DSyntaxKind::BitwiseXorAssign, start_pos, state.get_position());
597 }
598 else {
599 state.add_token(DSyntaxKind::BitwiseXor, start_pos, state.get_position());
600 }
601 return true;
602 }
603 '~' => {
604 state.advance(1);
605 if let Some('=') = state.peek() {
606 state.advance(1);
607 state.add_token(DSyntaxKind::ConcatenateAssign, start_pos, state.get_position());
608 }
609 else {
610 state.add_token(DSyntaxKind::BitwiseNot, start_pos, state.get_position());
611 }
612 return true;
613 }
614 '!' => {
615 state.advance(1);
616 if let Some('=') = state.peek() {
617 state.advance(1);
618 state.add_token(DSyntaxKind::NotEqual, start_pos, state.get_position());
619 }
620 else {
621 state.add_token(DSyntaxKind::Not, start_pos, state.get_position());
622 }
623 return true;
624 }
625 '<' => {
626 state.advance(1);
627 if let Some('<') = state.peek() {
628 state.advance(1);
629 if let Some('=') = state.peek() {
630 state.advance(1);
631 state.add_token(DSyntaxKind::LeftShiftAssign, start_pos, state.get_position());
632 }
633 else {
634 state.add_token(DSyntaxKind::LeftShift, start_pos, state.get_position());
635 }
636 }
637 else if let Some('=') = state.peek() {
638 state.advance(1);
639 state.add_token(DSyntaxKind::LessEqual, start_pos, state.get_position());
640 }
641 else {
642 state.add_token(DSyntaxKind::Less, start_pos, state.get_position());
643 }
644 return true;
645 }
646 '>' => {
647 state.advance(1);
648 if let Some('>') = state.peek() {
649 state.advance(1);
650 if let Some('=') = state.peek() {
651 state.advance(1);
652 state.add_token(DSyntaxKind::RightShiftAssign, start_pos, state.get_position());
653 }
654 else {
655 state.add_token(DSyntaxKind::RightShift, start_pos, state.get_position());
656 }
657 }
658 else if let Some('=') = state.peek() {
659 state.advance(1);
660 state.add_token(DSyntaxKind::GreaterEqual, start_pos, state.get_position());
661 }
662 else {
663 state.add_token(DSyntaxKind::Greater, start_pos, state.get_position());
664 }
665 return true;
666 }
667 '=' => {
668 state.advance(1);
669 if let Some('=') = state.peek() {
670 state.advance(1);
671 state.add_token(DSyntaxKind::Equal, start_pos, state.get_position());
672 }
673 else {
674 state.add_token(DSyntaxKind::Assign, start_pos, state.get_position());
675 }
676 return true;
677 }
678 _ => false,
679 }
680 }
681 else {
682 false
683 }
684 }
685
686 fn lex_delimiter(&self, state: &mut State) -> bool {
687 if let Some(ch) = state.peek() {
688 let start_pos = state.get_position();
689
690 match ch {
691 '(' => {
692 state.advance(1);
693 state.add_token(DSyntaxKind::LeftParen, start_pos, state.get_position());
694 return true;
695 }
696 ')' => {
697 state.advance(1);
698 state.add_token(DSyntaxKind::RightParen, start_pos, state.get_position());
699 return true;
700 }
701 '[' => {
702 state.advance(1);
703 state.add_token(DSyntaxKind::LeftBracket, start_pos, state.get_position());
704 return true;
705 }
706 ']' => {
707 state.advance(1);
708 state.add_token(DSyntaxKind::RightBracket, start_pos, state.get_position());
709 return true;
710 }
711 '{' => {
712 state.advance(1);
713 state.add_token(DSyntaxKind::LeftBrace, start_pos, state.get_position());
714 return true;
715 }
716 '}' => {
717 state.advance(1);
718 state.add_token(DSyntaxKind::RightBrace, start_pos, state.get_position());
719 return true;
720 }
721 ';' => {
722 state.advance(1);
723 state.add_token(DSyntaxKind::Semicolon, start_pos, state.get_position());
724 return true;
725 }
726 ',' => {
727 state.advance(1);
728 state.add_token(DSyntaxKind::Comma, start_pos, state.get_position());
729 return true;
730 }
731 '.' => {
732 state.advance(1);
733 state.add_token(DSyntaxKind::Dot, start_pos, state.get_position());
734 return true;
735 }
736 ':' => {
737 state.advance(1);
738 state.add_token(DSyntaxKind::Colon, start_pos, state.get_position());
739 return true;
740 }
741 '?' => {
742 state.advance(1);
743 state.add_token(DSyntaxKind::Question, start_pos, state.get_position());
744 return true;
745 }
746 _ => false,
747 }
748 }
749 else {
750 false
751 }
752 }
753}