1#![doc = include_str!("readme.md")]
2
3use crate::language::VbNetLanguage;
4
5pub mod token_type;
7
8use oak_core::{
9 Lexer, LexerCache, LexerState,
10 lexer::LexOutput,
11 source::{Source, TextEdit},
12};
13pub use token_type::VbNetTokenType;
14
15pub(crate) type State<'a, S> = LexerState<'a, S, VbNetLanguage>;
16
17pub struct VbNetLexer<'config> {
19 config: &'config VbNetLanguage,
20}
21
22impl<'config> VbNetLexer<'config> {
23 pub fn new(config: &'config VbNetLanguage) -> Self {
25 Self { config }
26 }
27
28 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
30 let start_pos = state.get_position();
31
32 while let Some(ch) = state.peek() {
33 if ch == ' ' || ch == '\t' {
34 state.advance(ch.len_utf8());
35 }
36 else {
37 break;
38 }
39 }
40
41 if state.get_position() > start_pos {
42 state.add_token(VbNetTokenType::Whitespace, start_pos, state.get_position());
43 true
44 }
45 else {
46 false
47 }
48 }
49
50 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
52 let start_pos = state.get_position();
53
54 if let Some('\n') = state.peek() {
55 state.advance(1);
56 state.add_token(VbNetTokenType::Newline, start_pos, state.get_position());
57 true
58 }
59 else if let Some('\r') = state.peek() {
60 state.advance(1);
61 if let Some('\n') = state.peek() {
62 state.advance(1);
63 }
64 state.add_token(VbNetTokenType::Newline, start_pos, state.get_position());
65 true
66 }
67 else {
68 false
69 }
70 }
71
72 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
74 let start_pos = state.get_position();
75
76 if let Some('\'') = state.peek() {
77 state.advance(1);
78 if let Some('\'') = state.peek() {
80 state.advance(1);
81 if let Some('\'') = state.peek() {
82 state.advance(1);
83 while state.not_at_end() {
85 if let Some('\'') = state.peek() {
86 state.advance(1);
87 if let Some('\'') = state.peek() {
88 state.advance(1);
89 if let Some('\'') = state.peek() {
90 state.advance(1);
91 break;
92 }
93 }
94 }
95 else {
96 state.advance(1);
97 }
98 }
99 state.add_token(VbNetTokenType::BlockComment, start_pos, state.get_position());
100 return true;
101 }
102 else {
103 state.set_position(start_pos);
105 return false;
106 }
107 }
108 else {
109 while let Some(ch) = state.peek() {
111 if ch == '\n' || ch == '\r' {
112 break;
113 }
114 state.advance(ch.len_utf8());
115 }
116 state.add_token(VbNetTokenType::LineComment, start_pos, state.get_position());
117 return true;
118 }
119 }
120 false
121 }
122
123 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
126 let start_pos = state.get_position();
127
128 if let Some('"') = state.peek() {
129 state.advance(1);
130 while let Some(ch) = state.peek() {
131 if ch == '"' {
132 state.advance(1);
134 if let Some('"') = state.peek() {
135 state.advance(1);
136 continue;
137 }
138 break;
139 }
140 else {
141 state.advance(ch.len_utf8())
142 }
143 }
144 state.add_token(VbNetTokenType::StringLiteral, start_pos, state.get_position());
145 true
146 }
147 else {
148 false
149 }
150 }
151
152 fn lex_char<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
154 let start_pos = state.get_position();
155
156 if let Some('\'') = state.peek() {
157 state.advance(1);
158 if let Some(ch) = state.peek() {
159 if ch != '\'' {
160 state.advance(ch.len_utf8());
161 if let Some('\\') = state.peek() {
163 state.advance(1);
164 if let Some(_) = state.peek() {
165 state.advance(1)
166 }
167 }
168 if let Some('\'') = state.peek() {
170 state.advance(1);
171 state.add_token(VbNetTokenType::CharLiteral, start_pos, state.get_position());
172 return true;
173 }
174 }
175 }
176 state.set_position(start_pos);
178 false
179 }
180 else {
181 false
182 }
183 }
184
185 fn lex_date<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
187 let start_pos = state.get_position();
188
189 if let Some('#') = state.peek() {
190 state.advance(1);
191 while let Some(ch) = state.peek() {
192 if ch == '#' {
193 state.advance(1);
194 break;
195 }
196 state.advance(ch.len_utf8());
197 }
198 state.add_token(VbNetTokenType::DateLiteral, start_pos, state.get_position());
199 true
200 }
201 else {
202 false
203 }
204 }
205
206 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
213 let start_pos = state.get_position();
214
215 if let Some(ch) = state.peek() {
216 if ch.is_numeric() {
217 state.advance(ch.len_utf8());
218
219 let mut has_dot = false;
220 let mut has_e = false;
221
222 while let Some(ch) = state.peek() {
223 if ch.is_numeric() {
224 state.advance(ch.len_utf8());
225 }
226 else if ch == '.' && !has_dot {
227 has_dot = true;
228 state.advance(1);
229 }
230 else if (ch == 'e' || ch == 'E') && !has_e {
231 has_e = true;
232 state.advance(1);
233 if let Some(ch) = state.peek() {
235 if ch == '+' || ch == '-' {
236 state.advance(1);
237 }
238 }
239 }
240 else if ch == '_' {
241 state.advance(1);
243 }
244 else {
245 break;
246 }
247 }
248
249 let token_type = if has_dot || has_e { VbNetTokenType::FloatLiteral } else { VbNetTokenType::IntegerLiteral };
250
251 state.add_token(token_type, start_pos, state.get_position());
252 true
253 }
254 else {
255 false
256 }
257 }
258 else {
259 false
260 }
261 }
262
263 fn lex_keyword_or_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
271 let start_pos = state.get_position();
272
273 if let Some(ch) = state.peek() {
274 if ch.is_alphabetic() || ch == '_' {
275 state.advance(ch.len_utf8());
276
277 while let Some(ch) = state.peek() {
278 if ch.is_alphanumeric() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
279 }
280
281 let text = state.get_text_in((start_pos..state.get_position()).into());
282 let token_type = match text.as_ref() {
283 "Namespace" => VbNetTokenType::Namespace,
284 "Imports" => VbNetTokenType::Imports,
285 "Class" => VbNetTokenType::Class,
286 "Interface" => VbNetTokenType::Interface,
287 "Structure" => VbNetTokenType::Structure,
288 "Enum" => VbNetTokenType::Enum,
289 "Module" => VbNetTokenType::Module,
290 "Delegate" => VbNetTokenType::Delegate,
291 "Event" => VbNetTokenType::Event,
292 "Function" => VbNetTokenType::Function,
293 "Sub" => VbNetTokenType::Sub,
294 "Property" => VbNetTokenType::Property,
295 "Dim" => VbNetTokenType::Dim,
296 "Const" => VbNetTokenType::Const,
297 "As" => VbNetTokenType::As,
298 "In" => VbNetTokenType::In,
299 "If" => VbNetTokenType::If,
300 "Then" => VbNetTokenType::Then,
301 "Else" => VbNetTokenType::Else,
302 "ElseIf" => VbNetTokenType::ElseIf,
303 "End" => VbNetTokenType::End,
304 "For" => VbNetTokenType::For,
305 "Each" => VbNetTokenType::Each,
306 "To" => VbNetTokenType::To,
307 "Step" => VbNetTokenType::Step,
308 "While" => VbNetTokenType::While,
309 "Do" => VbNetTokenType::Do,
310 "Loop" => VbNetTokenType::Loop,
311 "Until" => VbNetTokenType::Until,
312 "Select" => VbNetTokenType::Select,
313 "Case" => VbNetTokenType::Case,
314 "Default" => VbNetTokenType::Default,
315 "With" => VbNetTokenType::With,
316 "Try" => VbNetTokenType::Try,
317 "Catch" => VbNetTokenType::Catch,
318 "Finally" => VbNetTokenType::Finally,
319 "Throw" => VbNetTokenType::Throw,
320 "Exit" => VbNetTokenType::Exit,
321 "Continue" => VbNetTokenType::Continue,
322 "Return" => VbNetTokenType::Return,
323 "Me" => VbNetTokenType::Me,
324 "MyBase" => VbNetTokenType::MyBase,
325 "MyClass" => VbNetTokenType::MyClass,
326 "New" => VbNetTokenType::New,
327 "Of" => VbNetTokenType::Of,
328 "ByVal" => VbNetTokenType::ByVal,
329 "ByRef" => VbNetTokenType::ByRef,
330 "Optional" => VbNetTokenType::Optional,
331 "ParamArray" => VbNetTokenType::ParamArray,
332 "Public" => VbNetTokenType::Public,
333 "Private" => VbNetTokenType::Private,
334 "Protected" => VbNetTokenType::Protected,
335 "Friend" => VbNetTokenType::Friend,
336 "ProtectedFriend" => VbNetTokenType::ProtectedFriend,
337 "Shared" => VbNetTokenType::Shared,
338 "MustInherit" => VbNetTokenType::MustInherit,
339 "NotInheritable" => VbNetTokenType::NotInheritable,
340 "MustOverride" => VbNetTokenType::MustOverride,
341 "Overridable" => VbNetTokenType::Overridable,
342 "Overrides" => VbNetTokenType::Overrides,
343 "NotOverridable" => VbNetTokenType::NotOverridable,
344 "MustOverrideReadOnly" => VbNetTokenType::MustOverrideReadOnly,
345 "ReadOnly" => VbNetTokenType::ReadOnly,
346 "WriteOnly" => VbNetTokenType::WriteOnly,
347 "Static" => VbNetTokenType::Static,
348 "Partial" => VbNetTokenType::Partial,
349 "Async" => VbNetTokenType::Async,
350 "Await" => VbNetTokenType::Await,
351 "From" => VbNetTokenType::From,
352 "Where" => VbNetTokenType::Where,
353 "Order" => VbNetTokenType::Order,
354 "By" => VbNetTokenType::By,
355 "Group" => VbNetTokenType::Group,
356 "Join" => VbNetTokenType::Join,
357 "On" => VbNetTokenType::On,
358 "Into" => VbNetTokenType::Into,
359 "Let" => VbNetTokenType::Let,
360 "And" => VbNetTokenType::And,
361 "Or" => VbNetTokenType::Or,
362 "Not" => VbNetTokenType::Not,
363 "Xor" => VbNetTokenType::Xor,
364 "AndAlso" => VbNetTokenType::AndAlso,
365 "OrElse" => VbNetTokenType::OrElse,
366 "Is" => VbNetTokenType::Is,
367 "IsNot" => VbNetTokenType::IsNot,
368 "Like" => VbNetTokenType::Like,
369 "TypeOf" => VbNetTokenType::TypeOf,
370 "True" => VbNetTokenType::BooleanLiteral,
371 "False" => VbNetTokenType::BooleanLiteral,
372 "Nothing" => VbNetTokenType::NothingLiteral,
373 _ => VbNetTokenType::Identifier,
374 };
375
376 state.add_token(token_type, start_pos, state.get_position());
377 true
378 }
379 else {
380 false
381 }
382 }
383 else {
384 false
385 }
386 }
387
388 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
392 let start_pos = state.get_position();
393
394 if let Some(ch) = state.peek() {
395 let token_type = match ch {
396 '+' => {
397 state.advance(1);
398 VbNetTokenType::Plus
399 }
400 '-' => {
401 state.advance(1);
402 VbNetTokenType::Minus
403 }
404 '*' => {
405 state.advance(1);
406 VbNetTokenType::Star
407 }
408 '/' => {
409 state.advance(1);
410 VbNetTokenType::Slash
411 }
412 '\\' => {
413 state.advance(1);
414 VbNetTokenType::Backslash
415 }
416 '%' => {
417 state.advance(1);
418 VbNetTokenType::Percent
419 }
420 '^' => {
421 state.advance(1);
422 VbNetTokenType::Caret
423 }
424 '=' => {
425 state.advance(1);
426 VbNetTokenType::Equal
427 }
428 '<' => {
429 state.advance(1);
430 if let Some('>') = state.peek() {
431 state.advance(1);
432 VbNetTokenType::NotEqual
433 }
434 else if let Some('=') = state.peek() {
435 state.advance(1);
436 VbNetTokenType::LessEqual
437 }
438 else {
439 VbNetTokenType::LessThan
440 }
441 }
442 '>' => {
443 state.advance(1);
444 if let Some('=') = state.peek() {
445 state.advance(1);
446 VbNetTokenType::GreaterEqual
447 }
448 else {
449 VbNetTokenType::GreaterThan
450 }
451 }
452 '&' => {
453 state.advance(1);
454 VbNetTokenType::Ampersand
455 }
456 '!' => {
457 state.advance(1);
458 VbNetTokenType::Exclamation
459 }
460 _ => return false,
461 };
462
463 state.add_token(token_type, start_pos, state.get_position());
464 true
465 }
466 else {
467 false
468 }
469 }
470
471 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
479 let start_pos = state.get_position();
480
481 if let Some(ch) = state.peek() {
482 let token_type = match ch {
483 '(' => VbNetTokenType::LeftParen,
484 ')' => VbNetTokenType::RightParen,
485 '[' => VbNetTokenType::LeftBracket,
486 ']' => VbNetTokenType::RightBracket,
487 '{' => VbNetTokenType::LeftBrace,
488 '}' => VbNetTokenType::RightBrace,
489 ';' => VbNetTokenType::Semicolon,
490 ',' => VbNetTokenType::Comma,
491 '.' => VbNetTokenType::Dot,
492 ':' => VbNetTokenType::Colon,
493 '#' => VbNetTokenType::Hash,
494 _ => return false,
495 };
496
497 state.advance(ch.len_utf8());
498 state.add_token(token_type, start_pos, state.get_position());
499 true
500 }
501 else {
502 false
503 }
504 }
505
506 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), oak_core::OakError> {
512 while state.not_at_end() {
513 let safe_point = state.get_position();
514
515 if self.skip_whitespace(state) {
516 continue;
517 }
518
519 if self.lex_newline(state) {
520 continue;
521 }
522
523 if self.lex_comment(state) {
524 continue;
525 }
526
527 if self.lex_string(state) {
528 continue;
529 }
530
531 if self.lex_char(state) {
532 continue;
533 }
534
535 if self.lex_date(state) {
536 continue;
537 }
538
539 if self.lex_number(state) {
540 continue;
541 }
542
543 if self.lex_keyword_or_identifier(state) {
544 continue;
545 }
546
547 if self.lex_operator(state) {
548 continue;
549 }
550
551 if self.lex_delimiter(state) {
552 continue;
553 }
554
555 let start_pos = state.get_position();
557 if let Some(ch) = state.peek() {
558 state.advance(ch.len_utf8());
559 state.add_token(VbNetTokenType::Error, start_pos, state.get_position());
560 }
561
562 state.advance_if_dead_lock(safe_point)
563 }
564 Ok(())
565 }
566}
567
568impl<'config> Lexer<VbNetLanguage> for VbNetLexer<'config> {
569 fn lex<'a, S: Source + ?Sized>(&self, text: &S, _edits: &[TextEdit], mut cache: &'a mut impl LexerCache<VbNetLanguage>) -> LexOutput<VbNetLanguage> {
570 let mut state = LexerState::new(text);
571 let result = self.run(&mut state);
572 if result.is_ok() {
573 state.add_eof();
574 }
575 state.finish_with_cache(result, &mut cache)
576 }
577}