1use crate::{kind::DSyntaxKind, language::DLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, TextEdit, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, DLanguage>;
5
6#[derive(Clone)]
8pub struct DLexer<'config> {
9 _config: &'config DLanguage,
10}
11
12impl<'config> Lexer<DLanguage> for DLexer<'config> {
13 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<DLanguage>) -> LexOutput<DLanguage> {
14 let mut state = LexerState::new(source);
15 let result = self.run(&mut state);
16 if result.is_ok() {
17 state.add_eof();
18 }
19 state.finish_with_cache(result, cache)
20 }
21}
22
23impl<'config> DLexer<'config> {
24 pub fn new(config: &'config DLanguage) -> Self {
25 Self { _config: config }
26 }
27
28 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), oak_core::OakError> {
29 while state.not_at_end() {
30 let start_pos = state.get_position();
31
32 if self.skip_whitespace(state) {
34 continue;
35 }
36
37 if self.lex_newline(state) {
38 continue;
39 }
40
41 if self.lex_line_comment(state) {
42 continue;
43 }
44
45 if self.lex_block_comment(state) {
46 continue;
47 }
48
49 if self.lex_nested_comment(state) {
50 continue;
51 }
52
53 if self.lex_identifier_or_keyword(state) {
54 continue;
55 }
56
57 if self.lex_number(state) {
58 continue;
59 }
60
61 if self.lex_string(state) {
62 continue;
63 }
64
65 if self.lex_character(state) {
66 continue;
67 }
68
69 if self.lex_operator(state) {
70 continue;
71 }
72
73 if self.lex_delimiter(state) {
74 continue;
75 }
76
77 state.advance_if_dead_lock(start_pos);
79 if state.get_position() > start_pos {
80 state.add_token(DSyntaxKind::Error, start_pos, state.get_position());
81 }
82 }
83 Ok(())
84 }
85
86 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
87 if let Some(ch) = state.peek() {
88 if ch.is_whitespace() && ch != '\n' && ch != '\r' {
89 let start_pos = state.get_position();
90 while let Some(ch) = state.peek() {
91 if !ch.is_whitespace() || ch == '\n' || ch == '\r' {
92 break;
93 }
94 state.advance(ch.len_utf8());
95 }
96 state.add_token(DSyntaxKind::Whitespace, start_pos, state.get_position());
97 return true;
98 }
99 }
100 false
101 }
102
103 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
104 if let Some(ch) = state.peek() {
105 if ch == '\n' || ch == '\r' {
106 let start_pos = state.get_position();
107 if ch == '\r' {
108 state.advance(1);
109 if state.peek() == Some('\n') {
110 state.advance(1);
111 }
112 }
113 else {
114 state.advance(1);
115 }
116 state.add_token(DSyntaxKind::Newline, start_pos, state.get_position());
117 return true;
118 }
119 }
120 false
121 }
122
123 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
124 if let Some(ch) = state.peek() {
125 if ch.is_alphabetic() || ch == '_' {
126 let start_pos = state.get_position();
127 while let Some(ch) = state.peek() {
128 if ch.is_alphanumeric() || ch == '_' {
129 state.advance(ch.len_utf8());
130 }
131 else {
132 break;
133 }
134 }
135 let end_pos = state.get_position();
136 let text = state.get_text_in((start_pos..end_pos).into());
137
138 let kind = match text.as_ref() {
139 "module" => DSyntaxKind::ModuleKeyword,
140 "import" => DSyntaxKind::ImportKeyword,
141 "public" => DSyntaxKind::PublicKeyword,
142 "private" => DSyntaxKind::PrivateKeyword,
143 "protected" => DSyntaxKind::ProtectedKeyword,
144 "package" => DSyntaxKind::PackageKeyword,
145 "export" => DSyntaxKind::ExportKeyword,
146 "static" => DSyntaxKind::StaticKeyword,
147 "final" => DSyntaxKind::FinalKeyword,
148 "abstract" => DSyntaxKind::AbstractKeyword,
149 "override" => DSyntaxKind::OverrideKeyword,
150 "synchronized" => DSyntaxKind::SynchronizedKeyword,
151 "const" => DSyntaxKind::ConstKeyword,
152 "immutable" => DSyntaxKind::ImmutableKeyword,
153 "inout" => DSyntaxKind::InoutKeyword,
154 "shared" => DSyntaxKind::SharedKeyword,
155 "class" => DSyntaxKind::ClassKeyword,
156 "struct" => DSyntaxKind::StructKeyword,
157 "interface" => DSyntaxKind::InterfaceKeyword,
158 "union" => DSyntaxKind::UnionKeyword,
159 "enum" => DSyntaxKind::EnumKeyword,
160 "function" => DSyntaxKind::FunctionKeyword,
161 "delegate" => DSyntaxKind::DelegateKeyword,
162 "if" => DSyntaxKind::IfKeyword,
163 "else" => DSyntaxKind::ElseKeyword,
164 "while" => DSyntaxKind::WhileKeyword,
165 "for" => DSyntaxKind::ForKeyword,
166 "foreach" => DSyntaxKind::ForeachKeyword,
167 "do" => DSyntaxKind::DoKeyword,
168 "switch" => DSyntaxKind::SwitchKeyword,
169 "case" => DSyntaxKind::CaseKeyword,
170 "default" => DSyntaxKind::DefaultKeyword,
171 "break" => DSyntaxKind::BreakKeyword,
172 "continue" => DSyntaxKind::ContinueKeyword,
173 "return" => DSyntaxKind::ReturnKeyword,
174 "goto" => DSyntaxKind::GotoKeyword,
175 "try" => DSyntaxKind::TryKeyword,
176 "catch" => DSyntaxKind::CatchKeyword,
177 "finally" => DSyntaxKind::FinallyKeyword,
178 "throw" => DSyntaxKind::ThrowKeyword,
179 "scope" => DSyntaxKind::ScopeKeyword,
180 "with" => DSyntaxKind::WithKeyword,
181 "asm" => DSyntaxKind::AsmKeyword,
182 "mixin" => DSyntaxKind::MixinKeyword,
183 "template" => DSyntaxKind::TemplateKeyword,
184 "alias" => DSyntaxKind::AliasKeyword,
185 "typeof" => DSyntaxKind::TypeofKeyword,
186 "typeid" => DSyntaxKind::TypeidKeyword,
187 "is" => DSyntaxKind::IsKeyword,
188 "in" => DSyntaxKind::InKeyword,
189 "out" => DSyntaxKind::OutKeyword,
190 "ref" => DSyntaxKind::RefKeyword,
191 "lazy" => DSyntaxKind::LazyKeyword,
192 "auto" => DSyntaxKind::AutoKeyword,
193 "extern" => DSyntaxKind::ExternKeyword,
194 "align" => DSyntaxKind::AlignKeyword,
195 "pragma" => DSyntaxKind::PragmaKeyword,
196 "debug" => DSyntaxKind::DebugKeyword,
197 "version" => DSyntaxKind::VersionKeyword,
198 "unittest" => DSyntaxKind::UnitTestKeyword,
199 "invariant" => DSyntaxKind::InvariantKeyword,
200 "body" => DSyntaxKind::BodyKeyword,
201 "new" => DSyntaxKind::NewKeyword,
202 "delete" => DSyntaxKind::DeleteKeyword,
203 "this" => DSyntaxKind::ThisKeyword,
204 "super" => DSyntaxKind::SuperKeyword,
205 "null" => DSyntaxKind::NullKeyword,
206 "true" => DSyntaxKind::TrueKeyword,
207 "false" => DSyntaxKind::FalseKeyword,
208 "cast" => DSyntaxKind::CastKeyword,
209 "void" => DSyntaxKind::VoidType,
210 "bool" => DSyntaxKind::BoolType,
211 "byte" => DSyntaxKind::ByteType,
212 "ubyte" => DSyntaxKind::UbyteType,
213 "short" => DSyntaxKind::ShortType,
214 "ushort" => DSyntaxKind::UshortType,
215 "int" => DSyntaxKind::IntType,
216 "uint" => DSyntaxKind::UintType,
217 "long" => DSyntaxKind::LongType,
218 "ulong" => DSyntaxKind::UlongType,
219 "cent" => DSyntaxKind::CentType,
220 "ucent" => DSyntaxKind::UcentType,
221 "float" => DSyntaxKind::FloatType,
222 "double" => DSyntaxKind::DoubleType,
223 "real" => DSyntaxKind::RealType,
224 "ifloat" => DSyntaxKind::IfloatType,
225 "idouble" => DSyntaxKind::IdoubleType,
226 "ireal" => DSyntaxKind::IrealType,
227 "cfloat" => DSyntaxKind::CfloatType,
228 "cdouble" => DSyntaxKind::CdoubleType,
229 "creal" => DSyntaxKind::CrealType,
230 "char" => DSyntaxKind::CharType,
231 "wchar" => DSyntaxKind::WcharType,
232 "dchar" => DSyntaxKind::DcharType,
233 "string" => DSyntaxKind::StringType,
234 "wstring" => DSyntaxKind::WstringType,
235 "dstring" => DSyntaxKind::DstringType,
236 "typedef" => DSyntaxKind::TypedefKeyword,
237 "pure" => DSyntaxKind::PureKeyword,
238 "nothrow" => DSyntaxKind::NothrowKeyword,
239 "safe" => DSyntaxKind::SafeKeyword,
240 "trusted" => DSyntaxKind::TrustedKeyword,
241 "system" => DSyntaxKind::SystemKeyword,
242 "nogc" => DSyntaxKind::NogcKeyword,
243 "property" => DSyntaxKind::PropertyKeyword,
244 "disable" => DSyntaxKind::DisableKeyword,
245 "deprecated" => DSyntaxKind::DeprecatedKeyword,
246 _ => DSyntaxKind::Identifier,
247 };
248
249 state.add_token(kind, start_pos, end_pos);
250 return true;
251 }
252 }
253 false
254 }
255
256 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
257 if let Some(ch) = state.peek() {
258 if ch.is_ascii_digit() {
259 let start_pos = state.get_position();
260
261 while let Some(ch) = state.peek() {
263 if ch.is_ascii_digit() || ch == '_' {
264 state.advance(ch.len_utf8());
265 }
266 else {
267 break;
268 }
269 }
270
271 if let Some('.') = state.peek() {
273 state.advance(1);
274 while let Some(ch) = state.peek() {
275 if ch.is_ascii_digit() || ch == '_' {
276 state.advance(ch.len_utf8());
277 }
278 else {
279 break;
280 }
281 }
282 }
283
284 if let Some(ch) = state.peek() {
286 if ch == 'e' || ch == 'E' {
287 state.advance(1);
288 if let Some(ch) = state.peek() {
289 if ch == '+' || ch == '-' {
290 state.advance(1);
291 }
292 }
293 while let Some(ch) = state.peek() {
294 if ch.is_ascii_digit() || ch == '_' {
295 state.advance(ch.len_utf8());
296 }
297 else {
298 break;
299 }
300 }
301 }
302 }
303
304 if let Some(ch) = state.peek() {
306 if ch == 'f' || ch == 'F' || ch == 'L' || ch == 'u' || ch == 'U' {
307 state.advance(1);
308 }
309 }
310
311 state.add_token(DSyntaxKind::IntegerLiteral, start_pos, state.get_position());
312 return true;
313 }
314 }
315 false
316 }
317
318 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
319 if let Some(ch) = state.peek() {
320 if ch == '"' || ch == '\'' {
321 let start_pos = state.get_position();
322 let quote = ch;
323 state.advance(1); while let Some(ch) = state.peek() {
326 if ch == quote {
327 state.advance(1); break;
329 }
330 else if ch == '\\' {
331 state.advance(1); if state.peek().is_some() {
333 state.advance(1); }
335 }
336 else {
337 state.advance(ch.len_utf8());
338 }
339 }
340
341 state.add_token(DSyntaxKind::StringLiteral, start_pos, state.get_position());
342 return true;
343 }
344 }
345 false
346 }
347
348 fn lex_character<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
349 if let Some('\'') = state.peek() {
350 let start_pos = state.get_position();
351 state.advance(1); if let Some(ch) = state.peek() {
354 if ch == '\\' {
355 state.advance(1); if state.peek().is_some() {
357 state.advance(1); }
359 }
360 else {
361 state.advance(ch.len_utf8());
362 }
363 }
364
365 if let Some('\'') = state.peek() {
366 state.advance(1); }
368
369 state.add_token(DSyntaxKind::CharLiteral, start_pos, state.get_position());
370 return true;
371 }
372 false
373 }
374
375 fn lex_line_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
376 if let Some('/') = state.peek() {
377 if let Some('/') = state.peek_next_n(1) {
378 let start_pos = state.get_position();
379 state.advance(2);
380 while let Some(ch) = state.peek() {
381 if ch == '\n' || ch == '\r' {
382 break;
383 }
384 state.advance(ch.len_utf8());
385 }
386 state.add_token(DSyntaxKind::LineComment, start_pos, state.get_position());
387 return true;
388 }
389 }
390 false
391 }
392
393 fn lex_block_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
394 if let Some('/') = state.peek() {
395 if let Some('*') = state.peek_next_n(1) {
396 let start_pos = state.get_position();
397 state.advance(2);
398 while let Some(ch) = state.peek() {
399 if ch == '*' {
400 state.advance(1);
401 if state.peek() == Some('/') {
402 state.advance(1);
403 break;
404 }
405 }
406 else {
407 state.advance(ch.len_utf8());
408 }
409 }
410 state.add_token(DSyntaxKind::BlockComment, start_pos, state.get_position());
411 return true;
412 }
413 }
414 false
415 }
416
417 fn lex_nested_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
418 if let Some('/') = state.peek() {
419 if let Some('+') = state.peek_next_n(1) {
420 let start_pos = state.get_position();
421 state.advance(2);
422 let mut depth = 1;
423 while let Some(ch) = state.peek() {
424 if ch == '/' {
425 state.advance(1);
426 if state.peek() == Some('+') {
427 state.advance(1);
428 depth += 1;
429 }
430 }
431 else if ch == '+' {
432 state.advance(1);
433 if state.peek() == Some('/') {
434 state.advance(1);
435 depth -= 1;
436 if depth == 0 {
437 break;
438 }
439 }
440 }
441 else {
442 state.advance(ch.len_utf8());
443 }
444 }
445 state.add_token(DSyntaxKind::NestedComment, start_pos, state.get_position());
446 return true;
447 }
448 }
449 false
450 }
451
452 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
453 if let Some(ch) = state.peek() {
454 let start_pos = state.get_position();
455
456 match ch {
457 '+' => {
458 state.advance(1);
459 if let Some('=') = state.peek() {
460 state.advance(1);
461 state.add_token(DSyntaxKind::PlusAssign, start_pos, state.get_position());
462 }
463 else if let Some('+') = state.peek() {
464 state.advance(1);
465 state.add_token(DSyntaxKind::Increment, start_pos, state.get_position());
466 }
467 else {
468 state.add_token(DSyntaxKind::Plus, start_pos, state.get_position());
469 }
470 return true;
471 }
472 '-' => {
473 state.advance(1);
474 if let Some('=') = state.peek() {
475 state.advance(1);
476 state.add_token(DSyntaxKind::MinusAssign, start_pos, state.get_position());
477 }
478 else if let Some('-') = state.peek() {
479 state.advance(1);
480 state.add_token(DSyntaxKind::Decrement, start_pos, state.get_position());
481 }
482 else {
483 state.add_token(DSyntaxKind::Minus, start_pos, state.get_position());
484 }
485 return true;
486 }
487 '*' => {
488 state.advance(1);
489 if let Some('=') = state.peek() {
490 state.advance(1);
491 state.add_token(DSyntaxKind::MultiplyAssign, start_pos, state.get_position());
492 }
493 else {
494 state.add_token(DSyntaxKind::Multiply, start_pos, state.get_position());
495 }
496 return true;
497 }
498 '/' => {
499 return false;
501 }
502 '%' => {
503 state.advance(1);
504 if let Some('=') = state.peek() {
505 state.advance(1);
506 state.add_token(DSyntaxKind::ModuloAssign, start_pos, state.get_position());
507 }
508 else {
509 state.add_token(DSyntaxKind::Modulo, start_pos, state.get_position());
510 }
511 return true;
512 }
513 '&' => {
514 state.advance(1);
515 if let Some('&') = state.peek() {
516 state.advance(1);
517 state.add_token(DSyntaxKind::LogicalAnd, start_pos, state.get_position());
518 }
519 else if let Some('=') = state.peek() {
520 state.advance(1);
521 state.add_token(DSyntaxKind::BitwiseAndAssign, start_pos, state.get_position());
522 }
523 else {
524 state.add_token(DSyntaxKind::BitwiseAnd, start_pos, state.get_position());
525 }
526 return true;
527 }
528 '|' => {
529 state.advance(1);
530 if let Some('|') = state.peek() {
531 state.advance(1);
532 state.add_token(DSyntaxKind::LogicalOr, start_pos, state.get_position());
533 }
534 else if let Some('=') = state.peek() {
535 state.advance(1);
536 state.add_token(DSyntaxKind::BitwiseOrAssign, start_pos, state.get_position());
537 }
538 else {
539 state.add_token(DSyntaxKind::BitwiseOr, start_pos, state.get_position());
540 }
541 return true;
542 }
543 '^' => {
544 state.advance(1);
545 if let Some('=') = state.peek() {
546 state.advance(1);
547 state.add_token(DSyntaxKind::BitwiseXorAssign, start_pos, state.get_position());
548 }
549 else {
550 state.add_token(DSyntaxKind::BitwiseXor, start_pos, state.get_position());
551 }
552 return true;
553 }
554 '~' => {
555 state.advance(1);
556 if let Some('=') = state.peek() {
557 state.advance(1);
558 state.add_token(DSyntaxKind::ConcatenateAssign, start_pos, state.get_position());
559 }
560 else {
561 state.add_token(DSyntaxKind::BitwiseNot, start_pos, state.get_position());
562 }
563 return true;
564 }
565 '!' => {
566 state.advance(1);
567 if let Some('=') = state.peek() {
568 state.advance(1);
569 state.add_token(DSyntaxKind::NotEqual, start_pos, state.get_position());
570 }
571 else {
572 state.add_token(DSyntaxKind::Not, start_pos, state.get_position());
573 }
574 return true;
575 }
576 '<' => {
577 state.advance(1);
578 if let Some('<') = state.peek() {
579 state.advance(1);
580 if let Some('=') = state.peek() {
581 state.advance(1);
582 state.add_token(DSyntaxKind::LeftShiftAssign, start_pos, state.get_position());
583 }
584 else {
585 state.add_token(DSyntaxKind::LeftShift, start_pos, state.get_position());
586 }
587 }
588 else if let Some('=') = state.peek() {
589 state.advance(1);
590 state.add_token(DSyntaxKind::LessEqual, start_pos, state.get_position());
591 }
592 else {
593 state.add_token(DSyntaxKind::Less, start_pos, state.get_position());
594 }
595 return true;
596 }
597 '>' => {
598 state.advance(1);
599 if let Some('>') = state.peek() {
600 state.advance(1);
601 if let Some('=') = state.peek() {
602 state.advance(1);
603 state.add_token(DSyntaxKind::RightShiftAssign, start_pos, state.get_position());
604 }
605 else {
606 state.add_token(DSyntaxKind::RightShift, start_pos, state.get_position());
607 }
608 }
609 else if let Some('=') = state.peek() {
610 state.advance(1);
611 state.add_token(DSyntaxKind::GreaterEqual, start_pos, state.get_position());
612 }
613 else {
614 state.add_token(DSyntaxKind::Greater, start_pos, state.get_position());
615 }
616 return true;
617 }
618 '=' => {
619 state.advance(1);
620 if let Some('=') = state.peek() {
621 state.advance(1);
622 state.add_token(DSyntaxKind::Equal, start_pos, state.get_position());
623 }
624 else {
625 state.add_token(DSyntaxKind::Assign, start_pos, state.get_position());
626 }
627 return true;
628 }
629 _ => false,
630 }
631 }
632 else {
633 false
634 }
635 }
636
637 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
638 if let Some(ch) = state.peek() {
639 let start_pos = state.get_position();
640
641 match ch {
642 '(' => {
643 state.advance(1);
644 state.add_token(DSyntaxKind::LeftParen, start_pos, state.get_position());
645 return true;
646 }
647 ')' => {
648 state.advance(1);
649 state.add_token(DSyntaxKind::RightParen, start_pos, state.get_position());
650 return true;
651 }
652 '[' => {
653 state.advance(1);
654 state.add_token(DSyntaxKind::LeftBracket, start_pos, state.get_position());
655 return true;
656 }
657 ']' => {
658 state.advance(1);
659 state.add_token(DSyntaxKind::RightBracket, start_pos, state.get_position());
660 return true;
661 }
662 '{' => {
663 state.advance(1);
664 state.add_token(DSyntaxKind::LeftBrace, start_pos, state.get_position());
665 return true;
666 }
667 '}' => {
668 state.advance(1);
669 state.add_token(DSyntaxKind::RightBrace, start_pos, state.get_position());
670 return true;
671 }
672 ';' => {
673 state.advance(1);
674 state.add_token(DSyntaxKind::Semicolon, start_pos, state.get_position());
675 return true;
676 }
677 ',' => {
678 state.advance(1);
679 state.add_token(DSyntaxKind::Comma, start_pos, state.get_position());
680 return true;
681 }
682 '.' => {
683 state.advance(1);
684 state.add_token(DSyntaxKind::Dot, start_pos, state.get_position());
685 return true;
686 }
687 ':' => {
688 state.advance(1);
689 state.add_token(DSyntaxKind::Colon, start_pos, state.get_position());
690 return true;
691 }
692 '?' => {
693 state.advance(1);
694 state.add_token(DSyntaxKind::Question, start_pos, state.get_position());
695 return true;
696 }
697 _ => false,
698 }
699 }
700 else {
701 false
702 }
703 }
704}