1#![doc = include_str!("readme.md")]
2pub mod token_type;
4
5use crate::{language::VLangLanguage, lexer::token_type::VLangTokenType};
6use oak_core::{
7 Lexer, LexerState,
8 lexer::{LexOutput, LexerCache},
9 source::Source,
10};
11
12pub(crate) type State<'a, S> = LexerState<'a, S, VLangLanguage>;
13
14#[derive(Clone, Debug)]
16pub struct VLangLexer<'config> {
17 config: &'config VLangLanguage,
18}
19
20impl<'config> VLangLexer<'config> {
21 pub fn new(config: &'config VLangLanguage) -> Self {
23 Self { config }
24 }
25
26 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
28 let start_pos = state.get_position();
29
30 while let Some(ch) = state.peek() {
31 if ch == ' ' || ch == '\t' {
32 state.advance(ch.len_utf8());
33 }
34 else {
35 break;
36 }
37 }
38
39 if state.get_position() > start_pos {
40 state.add_token(VLangTokenType::Whitespace, start_pos, state.get_position());
41 true
42 }
43 else {
44 false
45 }
46 }
47
48 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
50 let start_pos = state.get_position();
51
52 if let Some('\n') = state.peek() {
53 state.advance(1);
54 state.add_token(VLangTokenType::Newline, start_pos, state.get_position());
55 true
56 }
57 else if let Some('\r') = state.peek() {
58 state.advance(1);
59 if let Some('\n') = state.peek() {
60 state.advance(1);
61 }
62 state.add_token(VLangTokenType::Newline, start_pos, state.get_position());
63 true
64 }
65 else {
66 false
67 }
68 }
69
70 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
72 let start_pos = state.get_position();
73
74 if let Some('/') = state.peek() {
76 if let Some('/') = state.peek_next_n(1) {
77 state.advance(2);
78
79 while let Some(ch) = state.peek() {
81 if ch == '\n' || ch == '\r' {
82 break;
83 }
84 state.advance(ch.len_utf8());
85 }
86
87 state.add_token(VLangTokenType::Comment, start_pos, state.get_position());
88 return true;
89 }
90 else if let Some('*') = state.peek_next_n(1) {
92 state.advance(2);
93
94 while let Some(ch) = state.peek() {
95 if ch == '*' {
96 if let Some('/') = state.peek_next_n(1) {
97 state.advance(2);
98 break;
99 }
100 }
101 state.advance(ch.len_utf8());
102 }
103
104 state.add_token(VLangTokenType::Comment, start_pos, state.get_position());
105 return true;
106 }
107 }
108 false
109 }
110
111 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
113 let start_pos = state.get_position();
114
115 if let Some(quote) = state.peek() {
116 if quote == '"' || quote == '\'' {
117 state.advance(1);
118 let mut escaped = false;
119
120 while let Some(ch) = state.peek() {
121 if escaped {
122 escaped = false;
123 state.advance(ch.len_utf8());
124 }
125 else if ch == '\\' {
126 escaped = true;
127 state.advance(1);
128 }
129 else if ch == quote {
130 state.advance(1);
131 break;
132 }
133 else if ch == '\n' || ch == '\r' {
134 break; }
136 else {
137 state.advance(ch.len_utf8());
138 }
139 }
140
141 let token_kind = if quote == '"' { VLangTokenType::StringLiteral } else { VLangTokenType::CharLiteral };
142 state.add_token(token_kind, start_pos, state.get_position());
143 true
144 }
145 else {
146 false
147 }
148 }
149 else {
150 false
151 }
152 }
153
154 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
156 let start_pos = state.get_position();
157
158 if let Some(ch) = state.peek() {
159 if ch.is_ascii_digit() {
160 while let Some(digit) = state.peek() {
162 if digit.is_ascii_digit() {
163 state.advance(1);
164 }
165 else {
166 break;
167 }
168 }
169
170 let mut is_float = false;
172 if let Some('.') = state.peek() {
173 if let Some(next_ch) = state.peek_next_n(1) {
174 if next_ch.is_ascii_digit() {
175 is_float = true;
176 state.advance(1); while let Some(digit) = state.peek() {
180 if digit.is_ascii_digit() {
181 state.advance(1);
182 }
183 else {
184 break;
185 }
186 }
187 }
188 }
189 }
190
191 if let Some(e) = state.peek() {
193 if e == 'e' || e == 'E' {
194 let exp_start = state.get_position();
195 state.advance(1);
196
197 if let Some(sign) = state.peek() {
199 if sign == '+' || sign == '-' {
200 state.advance(1);
201 }
202 }
203
204 let mut has_exp_digits = false;
206 while let Some(digit) = state.peek() {
207 if digit.is_ascii_digit() {
208 has_exp_digits = true;
209 state.advance(1);
210 }
211 else {
212 break;
213 }
214 }
215
216 if has_exp_digits {
217 is_float = true;
218 }
219 else {
220 state.set_position(exp_start);
222 }
223 }
224 }
225
226 let token_kind = if is_float { VLangTokenType::FloatLiteral } else { VLangTokenType::IntegerLiteral };
227 state.add_token(token_kind, start_pos, state.get_position());
228 true
229 }
230 else {
231 false
232 }
233 }
234 else {
235 false
236 }
237 }
238
239 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
241 let start_pos = state.get_position();
242
243 if let Some(ch) = state.peek() {
244 if ch.is_ascii_alphabetic() || ch == '_' {
245 while let Some(ch) = state.peek() {
246 if ch.is_ascii_alphanumeric() || ch == '_' {
247 state.advance(ch.len_utf8());
248 }
249 else {
250 break;
251 }
252 }
253
254 let text = state.get_text_in((start_pos..state.get_position()).into());
255 let token_kind = match text.as_ref() {
256 "module" => VLangTokenType::ModuleKw,
257 "import" => VLangTokenType::ImportKw,
258 "pub" => VLangTokenType::PubKw,
259 "fn" => VLangTokenType::FnKw,
260 "struct" => VLangTokenType::StructKw,
261 "interface" => VLangTokenType::InterfaceKw,
262 "enum" => VLangTokenType::EnumKw,
263 "type" => VLangTokenType::TypeKw,
264 "const" => VLangTokenType::ConstKw,
265 "mut" => VLangTokenType::MutKw,
266 "shared" => VLangTokenType::SharedKw,
267 "volatile" => VLangTokenType::VolatileKw,
268 "unsafe" => VLangTokenType::UnsafeKw,
269 "if" => VLangTokenType::IfKw,
270 "else" => VLangTokenType::ElseKw,
271 "for" => VLangTokenType::ForKw,
272 "in" => VLangTokenType::InKw,
273 "match" => VLangTokenType::MatchKw,
274 "or" => VLangTokenType::OrKw,
275 "return" => VLangTokenType::ReturnKw,
276 "break" => VLangTokenType::BreakKw,
277 "continue" => VLangTokenType::ContinueKw,
278 "goto" => VLangTokenType::GotoKw,
279 "defer" => VLangTokenType::DeferKw,
280 "go" => VLangTokenType::GoKw,
281 "select" => VLangTokenType::SelectKw,
282 "lock" => VLangTokenType::LockKw,
283 "rlock" => VLangTokenType::RlockKw,
284 "as" => VLangTokenType::AsKw,
285 "is" => VLangTokenType::IsKw,
286 "sizeof" => VLangTokenType::SizeofKw,
287 "typeof" => VLangTokenType::TypeofKw,
288 "offsetof" => VLangTokenType::OffsetofKw,
289 "assert" => VLangTokenType::AssertKw,
290 "panic" => VLangTokenType::PanicKw,
291 "eprintln" => VLangTokenType::EprintlnKw,
292 "println" => VLangTokenType::PrintlnKw,
293 "print" => VLangTokenType::PrintKw,
294 "eprint" => VLangTokenType::EprintKw,
295 "bool" => VLangTokenType::BoolKw,
296 "i8" => VLangTokenType::I8Kw,
297 "i16" => VLangTokenType::I16Kw,
298 "i32" => VLangTokenType::I32Kw,
299 "i64" => VLangTokenType::I64Kw,
300 "u8" => VLangTokenType::U8Kw,
301 "u16" => VLangTokenType::U16Kw,
302 "u32" => VLangTokenType::U32Kw,
303 "u64" => VLangTokenType::U64Kw,
304 "int" => VLangTokenType::IntKw,
305 "uint" => VLangTokenType::UintKw,
306 "f32" => VLangTokenType::F32Kw,
307 "f64" => VLangTokenType::F64Kw,
308 "string" => VLangTokenType::StringKw,
309 "rune" => VLangTokenType::RuneKw,
310 "byte" => VLangTokenType::ByteKw,
311 "voidptr" => VLangTokenType::VoidptrKw,
312 "char" => VLangTokenType::CharKw,
313 "true" | "false" => VLangTokenType::BoolLiteral,
314 _ => VLangTokenType::Identifier,
315 };
316
317 state.add_token(token_kind, start_pos, state.get_position());
318 true
319 }
320 else {
321 false
322 }
323 }
324 else {
325 false
326 }
327 }
328
329 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
331 let start_pos = state.get_position();
332
333 if let Some(ch) = state.peek() {
334 let token_kind = match ch {
335 '+' => {
336 if let Some('=') = state.peek_next_n(1) {
337 state.advance(2);
338 VLangTokenType::PlusEq
339 }
340 else if let Some('+') = state.peek_next_n(1) {
341 state.advance(2);
342 VLangTokenType::PlusPlus
343 }
344 else {
345 state.advance(1);
346 VLangTokenType::Plus
347 }
348 }
349 '-' => {
350 if let Some('=') = state.peek_next_n(1) {
351 state.advance(2);
352 VLangTokenType::MinusEq
353 }
354 else if let Some('-') = state.peek_next_n(1) {
355 state.advance(2);
356 VLangTokenType::MinusMinus
357 }
358 else if let Some('>') = state.peek_next_n(1) {
359 state.advance(2);
360 VLangTokenType::Arrow
361 }
362 else {
363 state.advance(1);
364 VLangTokenType::Minus
365 }
366 }
367 '*' => {
368 if let Some('=') = state.peek_next_n(1) {
369 state.advance(2);
370 VLangTokenType::StarEq
371 }
372 else {
373 state.advance(1);
374 VLangTokenType::Star
375 }
376 }
377 '/' => {
378 if let Some('=') = state.peek_next_n(1) {
379 state.advance(2);
380 VLangTokenType::SlashEq
381 }
382 else {
383 state.advance(1);
384 VLangTokenType::Slash
385 }
386 }
387 '%' => {
388 if let Some('=') = state.peek_next_n(1) {
389 state.advance(2);
390 VLangTokenType::PercentEq
391 }
392 else {
393 state.advance(1);
394 VLangTokenType::Percent
395 }
396 }
397 '&' => {
398 if let Some('=') = state.peek_next_n(1) {
399 state.advance(2);
400 VLangTokenType::AmpersandEq
401 }
402 else if let Some('&') = state.peek_next_n(1) {
403 state.advance(2);
404 VLangTokenType::AndAnd
405 }
406 else {
407 state.advance(1);
408 VLangTokenType::Ampersand
409 }
410 }
411 '|' => {
412 if let Some('=') = state.peek_next_n(1) {
413 state.advance(2);
414 VLangTokenType::PipeEq
415 }
416 else if let Some('|') = state.peek_next_n(1) {
417 state.advance(2);
418 VLangTokenType::OrOr
419 }
420 else {
421 state.advance(1);
422 VLangTokenType::Pipe
423 }
424 }
425 '^' => {
426 if let Some('=') = state.peek_next_n(1) {
427 state.advance(2);
428 VLangTokenType::CaretEq
429 }
430 else {
431 state.advance(1);
432 VLangTokenType::Caret
433 }
434 }
435 '=' => {
436 if let Some('=') = state.peek_next_n(1) {
437 state.advance(2);
438 VLangTokenType::EqEq
439 }
440 else if let Some('>') = state.peek_next_n(1) {
441 state.advance(2);
442 VLangTokenType::FatArrow
443 }
444 else {
445 state.advance(1);
446 VLangTokenType::Eq
447 }
448 }
449 '!' => {
450 if let Some('=') = state.peek_next_n(1) {
451 state.advance(2);
452 VLangTokenType::Ne
453 }
454 else {
455 state.advance(1);
456 VLangTokenType::Bang
457 }
458 }
459 '<' => {
460 if let Some('=') = state.peek_next_n(1) {
461 state.advance(2);
462 VLangTokenType::Le
463 }
464 else if let Some('<') = state.peek_next_n(1) {
465 if let Some('=') = state.peek_next_n(2) {
466 state.advance(3);
467 VLangTokenType::LeftShiftEq
468 }
469 else {
470 state.advance(2);
471 VLangTokenType::LeftShift
472 }
473 }
474 else {
475 state.advance(1);
476 VLangTokenType::LessThan
477 }
478 }
479 '>' => {
480 if let Some('=') = state.peek_next_n(1) {
481 state.advance(2);
482 VLangTokenType::Ge
483 }
484 else if let Some('>') = state.peek_next_n(1) {
485 if let Some('=') = state.peek_next_n(2) {
486 state.advance(3);
487 VLangTokenType::RightShiftEq
488 }
489 else {
490 state.advance(2);
491 VLangTokenType::RightShift
492 }
493 }
494 else {
495 state.advance(1);
496 VLangTokenType::GreaterThan
497 }
498 }
499 '.' => {
500 if let Some('.') = state.peek_next_n(1) {
501 if let Some('.') = state.peek_next_n(2) {
502 state.advance(3);
503 VLangTokenType::DotDotDot
504 }
505 else {
506 state.advance(2);
507 VLangTokenType::DotDot
508 }
509 }
510 else {
511 state.advance(1);
512 VLangTokenType::Dot
513 }
514 }
515 ',' => {
516 state.advance(1);
517 VLangTokenType::Comma
518 }
519 ':' => {
520 state.advance(1);
521 VLangTokenType::Colon
522 }
523 ';' => {
524 state.advance(1);
525 VLangTokenType::Semicolon
526 }
527 '(' => {
528 state.advance(1);
529 VLangTokenType::LeftParen
530 }
531 ')' => {
532 state.advance(1);
533 VLangTokenType::RightParen
534 }
535 '[' => {
536 state.advance(1);
537 VLangTokenType::LeftBracket
538 }
539 ']' => {
540 state.advance(1);
541 VLangTokenType::RightBracket
542 }
543 '{' => {
544 state.advance(1);
545 VLangTokenType::LeftBrace
546 }
547 '}' => {
548 state.advance(1);
549 VLangTokenType::RightBrace
550 }
551 '?' => {
552 state.advance(1);
553 VLangTokenType::Question
554 }
555 '~' => {
556 state.advance(1);
557 VLangTokenType::Tilde
558 }
559 _ => {
560 state.advance(ch.len_utf8());
561 VLangTokenType::Error
562 }
563 };
564
565 state.add_token(token_kind, start_pos, state.get_position());
566 true
567 }
568 else {
569 false
570 }
571 }
572}
573
574impl<'config> Lexer<VLangLanguage> for VLangLexer<'config> {
575 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<VLangLanguage>) -> LexOutput<VLangLanguage> {
576 let mut state = State::new_with_cache(source, 0, cache);
577
578 while let Some(_ch) = state.peek() {
579 if self.skip_whitespace(&mut state) {
580 continue;
581 }
582
583 if self.lex_newline(&mut state) {
584 continue;
585 }
586
587 if self.lex_comment(&mut state) {
588 continue;
589 }
590
591 if self.lex_string(&mut state) {
592 continue;
593 }
594
595 if self.lex_number(&mut state) {
596 continue;
597 }
598
599 if self.lex_identifier_or_keyword(&mut state) {
600 continue;
601 }
602
603 if self.lex_operator(&mut state) {
604 continue;
605 }
606
607 let start_pos = state.get_position();
609 if let Some(ch) = state.peek() {
610 state.advance(ch.len_utf8());
611 state.add_token(VLangTokenType::Error, start_pos, state.get_position());
612 }
613 }
614
615 state.add_eof();
616 state.finish_with_cache(Ok(()), cache)
617 }
618}