1use crate::error::{CompilerError, Result};
4use serde::{Deserialize, Serialize};
5
6#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub enum Token {
9 Number(f64),
11 String(String),
12 TemplateLiteral(String),
13 Boolean(bool),
14 Null,
15 Undefined,
16
17 Identifier(String),
19 Keyword(Keyword),
20
21 Plus,
23 Minus,
24 Multiply,
25 Divide,
26 Modulo,
27 Equal,
28 NotEqual,
29 StrictEqual,
30 StrictNotEqual,
31 LessThan,
32 GreaterThan,
33 LessEqual,
34 GreaterEqual,
35 And,
36 Or,
37 Not,
38 Assign,
39 Arrow, PlusAssign,
41 MinusAssign,
42 MultiplyAssign,
43 DivideAssign,
44 Union, Intersection, LeftParen,
49 RightParen,
50 LeftBrace,
51 RightBrace,
52 LeftBracket,
53 RightBracket,
54 Semicolon,
55 Comma,
56 Dot,
57 Colon,
58 QuestionMark,
59 At,
60 RegExp(String, String), TypeAnnotation,
64 GenericStart,
65 GenericEnd,
66
67 Newline,
69 Whitespace,
70 Comment(String),
71 EOF,
72}
73
74#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
76pub enum Keyword {
77 Let,
79 Const,
80 Var,
81 Function,
82 Class,
83 Interface,
84 Type,
85 Enum,
86 Namespace,
87 Module,
88 Import,
89 Export,
90 From,
91 As,
92 Default,
93
94 If,
96 Else,
97 Switch,
98 Case,
99 DefaultCase,
100 For,
101 While,
102 Do,
103 Break,
104 Continue,
105 Return,
106 Throw,
107 Try,
108 Catch,
109 Finally,
110
111 Extends,
113 Implements,
114 Super,
115 This,
116 New,
117 Static,
118 Public,
119 Private,
120 Protected,
121 Abstract,
122 Readonly,
123 Get,
124 Set,
125 Constructor,
126
127 Async,
129 Await,
130 Promise,
131
132 Any,
134 Unknown,
135 Never,
136 Void,
137 Null,
138 Undefined,
139 Boolean,
140 Number,
141 String,
142 Symbol,
143 BigInt,
144 Object,
145 Array,
146 Tuple,
147 Union,
148 Intersection,
149 Literal,
150 Mapped,
151 Conditional,
152 Template,
153
154 Partial,
156 Required,
157 Pick,
158 Omit,
159 Record,
160 Exclude,
161 Extract,
162 NonNullable,
163 Parameters,
164 ReturnType,
165 InstanceType,
166 ThisParameterType,
167 OmitThisParameter,
168 ThisType,
169
170 True,
172 False,
173 In,
174 Of,
175 Instanceof,
176 Typeof,
177 Keyof,
178 Key,
179 Is,
180 Asserts,
181 Infer,
182 Declare,
183 Ambient,
184 Global,
185}
186
187pub struct Lexer {
189 input: String,
190 position: usize,
191 line: usize,
192 column: usize,
193}
194
195impl Lexer {
196 pub fn new(input: String) -> Self {
198 Self {
199 input,
200 position: 0,
201 line: 1,
202 column: 1,
203 }
204 }
205
206 pub fn new_utf8(input: String) -> Self {
208 Self::new(input)
209 }
210
211 pub fn tokenize(&mut self) -> Result<Vec<Token>> {
213 let mut tokens = Vec::new();
214
215 while self.position < self.input.len() {
216 match self.next_token()? {
217 Some(token) => {
218 tokens.push(token);
219 }
220 None => break,
221 }
222 }
223
224 tokens.push(Token::EOF);
225 Ok(tokens)
226 }
227
228 fn next_token(&mut self) -> Result<Option<Token>> {
230 self.skip_whitespace();
231
232 if self.position >= self.input.len() {
233 return Ok(None);
234 }
235
236 let ch = self.current_char();
237 let token = match ch {
238 '+' => {
239 if self.peek_char() == Some('=') {
240 self.advance();
241 Ok(Some(Token::PlusAssign))
242 } else if self.peek_char() == Some('+') {
243 self.advance();
244 Ok(Some(Token::Plus)) } else {
246 Ok(Some(Token::Plus))
247 }
248 }
249 '-' => {
250 if self.peek_char() == Some('=') {
251 self.advance();
252 Ok(Some(Token::MinusAssign))
253 } else if self.peek_char() == Some('>') {
254 self.advance();
255 Ok(Some(Token::Arrow))
256 } else {
257 Ok(Some(Token::Minus))
258 }
259 }
260 '*' => {
261 if self.peek_char() == Some('=') {
262 self.advance();
263 Ok(Some(Token::MultiplyAssign))
264 } else {
265 Ok(Some(Token::Multiply))
266 }
267 }
268 '/' => {
269 if self.peek_char() == Some('=') {
270 self.advance();
271 Ok(Some(Token::DivideAssign))
272 } else if self.peek_char() == Some('/') {
273 self.advance();
274 self.skip_line_comment();
275 Ok(None)
276 } else if self.peek_char() == Some('*') {
277 self.advance();
278 self.skip_block_comment();
279 Ok(None)
280 } else {
281 Ok(Some(Token::Divide))
282 }
283 }
284 '%' => Ok(Some(Token::Modulo)),
285 '=' => {
286 if self.peek_char() == Some('=') {
287 self.advance();
288 if self.peek_char() == Some('=') {
289 self.advance();
290 Ok(Some(Token::StrictEqual))
291 } else {
292 Ok(Some(Token::Equal))
293 }
294 } else if self.peek_char() == Some('>') {
295 self.advance();
296 Ok(Some(Token::Arrow))
297 } else {
298 Ok(Some(Token::Assign))
299 }
300 }
301 '!' => {
302 if self.peek_char() == Some('=') {
303 self.advance();
304 if self.peek_char() == Some('=') {
305 self.advance();
306 Ok(Some(Token::StrictNotEqual))
307 } else {
308 Ok(Some(Token::NotEqual))
309 }
310 } else {
311 Ok(Some(Token::Not))
312 }
313 }
314 '<' => {
315 if self.peek_char() == Some('=') {
316 self.advance();
317 Ok(Some(Token::LessEqual))
318 } else {
319 Ok(Some(Token::LessThan))
320 }
321 }
322 '>' => {
323 if self.peek_char() == Some('=') {
324 self.advance();
325 Ok(Some(Token::GreaterEqual))
326 } else {
327 Ok(Some(Token::GreaterThan))
328 }
329 }
330 '&' => {
331 if self.peek_char() == Some('&') {
332 self.advance();
333 Ok(Some(Token::And))
334 } else {
335 Ok(Some(Token::Intersection))
336 }
337 }
338 '|' => {
339 if self.peek_char() == Some('|') {
340 self.advance();
341 Ok(Some(Token::Or))
342 } else {
343 Ok(Some(Token::Union))
344 }
345 }
346 '(' => Ok(Some(Token::LeftParen)),
347 ')' => Ok(Some(Token::RightParen)),
348 '{' => Ok(Some(Token::LeftBrace)),
349 '}' => Ok(Some(Token::RightBrace)),
350 '[' => Ok(Some(Token::LeftBracket)),
351 ']' => Ok(Some(Token::RightBracket)),
352 ';' => Ok(Some(Token::Semicolon)),
353 ',' => Ok(Some(Token::Comma)),
354 '.' => Ok(Some(Token::Dot)),
355 ':' => Ok(Some(Token::Colon)),
356 '?' => Ok(Some(Token::QuestionMark)),
357 '@' => Ok(Some(Token::At)), '"' | '\'' => Ok(self.parse_string()?),
359 '`' => Ok(self.parse_template_literal()?),
360 '0'..='9' => Ok(self.parse_number()?),
361 'a'..='z' | 'A'..='Z' | '_' | '$' => Ok(self.parse_identifier_or_keyword()?),
362 _ if ch.is_alphabetic() || ch.is_alphanumeric() => Ok(self.parse_identifier_or_keyword()?),
363 _ => {
364 return Err(CompilerError::parse_error(
365 self.line,
366 self.column,
367 format!("Unexpected character: {}", ch),
368 ));
369 }
370 };
371
372 match ch {
374 'a'..='z' | 'A'..='Z' | '_' | '$' => {
375 }
377 '0'..='9' => {
378 }
380 '"' | '\'' => {
381 }
383 _ if ch.is_alphabetic() || ch.is_alphanumeric() => {
384 }
386 _ => {
387 self.advance();
389 }
390 }
391 token
392 }
393
394 fn current_char(&self) -> char {
396 self.input.chars().nth(self.position).unwrap_or('\0')
397 }
398
399
400 fn peek_char(&self) -> Option<char> {
402 self.input.chars().nth(self.position + 1)
403 }
404
405 fn advance(&mut self) {
407 if self.position < self.input.len() {
408 let ch = self.current_char();
409 if ch == '\n' {
410 self.line += 1;
411 self.column = 1;
412 } else {
413 self.column += 1;
414 }
415
416 self.position += 1;
418 }
419 }
420
421 fn skip_whitespace(&mut self) {
423 while self.position < self.input.len() {
424 let ch = self.current_char();
425 if ch.is_whitespace() {
426 self.advance();
427 } else if ch == '/' && self.peek_char() == Some('/') {
428 self.advance(); self.advance(); while self.position < self.input.len() && self.current_char() != '\n' {
432 self.advance();
433 }
434 } else if ch == '/' && self.peek_char() == Some('*') {
435 self.advance(); self.advance(); while self.position < self.input.len() {
439 if self.current_char() == '*' && self.peek_char() == Some('/') {
440 self.advance(); self.advance(); break;
443 }
444 self.advance();
445 }
446 } else {
447 break;
448 }
449 }
450 }
451
452 fn skip_line_comment(&mut self) -> Option<Token> {
454 while self.position < self.input.len() && self.current_char() != '\n' {
455 self.advance();
456 }
457 None
458 }
459
460 fn skip_block_comment(&mut self) -> Option<Token> {
462 while self.position < self.input.len() {
463 if self.current_char() == '*' && self.peek_char() == Some('/') {
464 self.advance();
465 self.advance();
466 break;
467 }
468 self.advance();
469 }
470 None
471 }
472
473 fn parse_string(&mut self) -> Result<Option<Token>> {
475 let quote = self.current_char();
476 let mut value = String::new();
477 self.advance();
478
479 while self.position < self.input.len() {
480 let ch = self.current_char();
481 if ch == quote {
482 self.advance();
483 return Ok(Some(Token::String(value)));
484 } else if ch == '\\' {
485 self.advance();
486 if self.position < self.input.len() {
487 let escaped = self.current_char();
488 value.push(match escaped {
489 'n' => '\n',
490 't' => '\t',
491 'r' => '\r',
492 '\\' => '\\',
493 '"' => '"',
494 '\'' => '\'',
495 _ => escaped,
496 });
497 self.advance();
498 }
499 } else {
500 value.push(ch);
501 self.advance();
502 }
503 }
504
505 Err(CompilerError::parse_error(
506 self.line,
507 self.column,
508 "Unterminated string literal",
509 ))
510 }
511
512 fn parse_template_literal(&mut self) -> Result<Option<Token>> {
514 let mut value = String::new();
515 self.advance(); while self.position < self.input.len() {
518 let ch = self.current_char();
519 if ch == '`' {
520 self.advance();
521 return Ok(Some(Token::TemplateLiteral(value)));
522 } else if ch == '\\' {
523 self.advance();
524 if self.position < self.input.len() {
525 let escaped = self.current_char();
526 value.push(match escaped {
527 'n' => '\n',
528 't' => '\t',
529 'r' => '\r',
530 '\\' => '\\',
531 '`' => '`',
532 '$' => '$',
533 _ => escaped,
534 });
535 self.advance();
536 }
537 } else if ch == '$' && self.position + 1 < self.input.len() && self.input.chars().nth(self.position + 1) == Some('{') {
538 value.push('$');
540 self.advance();
541 if self.position < self.input.len() {
542 value.push('{');
543 self.advance();
544 while self.position < self.input.len() && self.current_char() != '}' {
546 value.push(self.current_char());
547 self.advance();
548 }
549 if self.position < self.input.len() {
550 value.push('}');
551 self.advance();
552 }
553 }
554 } else {
555 value.push(ch);
556 self.advance();
557 }
558 }
559
560 Err(CompilerError::parse_error(
561 self.line,
562 self.column,
563 "Unterminated template literal",
564 ))
565 }
566
567 fn parse_number(&mut self) -> Result<Option<Token>> {
569 let mut value = String::new();
570 let mut has_dot = false;
571
572 while self.position < self.input.len() {
573 let ch = self.current_char();
574 if ch.is_ascii_digit() {
575 value.push(ch);
576 self.advance();
577 } else if ch == '.' && !has_dot {
578 has_dot = true;
579 value.push(ch);
580 self.advance();
581 } else {
582 break;
583 }
584 }
585
586 let number: f64 = value.parse().map_err(|_| {
587 CompilerError::parse_error(self.line, self.column, "Invalid number literal")
588 })?;
589
590 Ok(Some(Token::Number(number)))
591 }
592
593 fn parse_identifier_or_keyword(&mut self) -> Result<Option<Token>> {
595 let mut value = String::new();
596
597 while self.position < self.input.len() {
598 let ch = self.current_char();
599 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
600 value.push(ch);
601 self.advance();
602 } else {
603 break;
604 }
605 }
606
607 if value == "true" {
609 Ok(Some(Token::Boolean(true)))
610 } else if value == "false" {
611 Ok(Some(Token::Boolean(false)))
612 } else if let Some(keyword) = self.parse_keyword(&value) {
613 Ok(Some(Token::Keyword(keyword)))
614 } else {
615 Ok(Some(Token::Identifier(value)))
616 }
617 }
618
619 fn parse_keyword(&self, value: &str) -> Option<Keyword> {
621 match value {
622 "let" => Some(Keyword::Let),
623 "const" => Some(Keyword::Const),
624 "var" => Some(Keyword::Var),
625 "function" => Some(Keyword::Function),
626 "class" => Some(Keyword::Class),
627 "interface" => Some(Keyword::Interface),
628 "type" => Some(Keyword::Type),
629 "enum" => Some(Keyword::Enum),
630 "namespace" => Some(Keyword::Namespace),
631 "module" => Some(Keyword::Module),
632 "import" => Some(Keyword::Import),
633 "export" => Some(Keyword::Export),
634 "from" => Some(Keyword::From),
635 "as" => Some(Keyword::As),
636 "default" => Some(Keyword::Default),
637 "if" => Some(Keyword::If),
638 "else" => Some(Keyword::Else),
639 "switch" => Some(Keyword::Switch),
640 "case" => Some(Keyword::Case),
641 "for" => Some(Keyword::For),
642 "while" => Some(Keyword::While),
643 "do" => Some(Keyword::Do),
644 "break" => Some(Keyword::Break),
645 "continue" => Some(Keyword::Continue),
646 "return" => Some(Keyword::Return),
647 "throw" => Some(Keyword::Throw),
648 "try" => Some(Keyword::Try),
649 "catch" => Some(Keyword::Catch),
650 "finally" => Some(Keyword::Finally),
651 "extends" => Some(Keyword::Extends),
652 "implements" => Some(Keyword::Implements),
653 "super" => Some(Keyword::Super),
654 "this" => Some(Keyword::This),
655 "new" => Some(Keyword::New),
656 "static" => Some(Keyword::Static),
657 "public" => Some(Keyword::Public),
658 "private" => Some(Keyword::Private),
659 "protected" => Some(Keyword::Protected),
660 "abstract" => Some(Keyword::Abstract),
661 "readonly" => Some(Keyword::Readonly),
662 "get" => Some(Keyword::Get),
663 "set" => Some(Keyword::Set),
664 "async" => Some(Keyword::Async),
665 "await" => Some(Keyword::Await),
666 "Promise" => Some(Keyword::Promise),
667 "any" => Some(Keyword::Any),
668 "unknown" => Some(Keyword::Unknown),
669 "never" => Some(Keyword::Never),
670 "void" => Some(Keyword::Void),
671 "null" => Some(Keyword::Null),
672 "undefined" => Some(Keyword::Undefined),
673 "boolean" => Some(Keyword::Boolean),
674 "number" => Some(Keyword::Number),
675 "string" => Some(Keyword::String),
676 "object" => Some(Keyword::Object),
677 "Array" => Some(Keyword::Array),
678 "true" => Some(Keyword::True),
679 "false" => Some(Keyword::False),
680 "in" => Some(Keyword::In),
681 "of" => Some(Keyword::Of),
682 "instanceof" => Some(Keyword::Instanceof),
683 "typeof" => Some(Keyword::Typeof),
684 "keyof" => Some(Keyword::Keyof),
685 "key" => Some(Keyword::Key),
686 "is" => Some(Keyword::Is),
687 "asserts" => Some(Keyword::Asserts),
688 "infer" => Some(Keyword::Infer),
689 "declare" => Some(Keyword::Declare),
690 "global" => Some(Keyword::Global),
691 _ => None,
692 }
693 }
694
695 #[allow(dead_code)]
697 fn parse_regex(&mut self) -> Result<Option<Token>> {
698 let mut pattern = String::new();
699 let mut flags = String::new();
700
701 self.advance(); while self.position < self.input.len() {
705 let ch = self.current_char();
706 if ch == '/' {
707 self.advance();
708 break;
709 } else if ch == '\\' {
710 pattern.push(ch);
712 self.advance();
713 if self.position < self.input.len() {
714 pattern.push(self.current_char());
715 self.advance();
716 }
717 } else {
718 pattern.push(ch);
719 self.advance();
720 }
721 }
722
723 while self.position < self.input.len() {
725 let ch = self.current_char();
726 if ch.is_alphabetic() {
727 flags.push(ch);
728 self.advance();
729 } else {
730 break;
731 }
732 }
733
734 Ok(Some(Token::RegExp(pattern, flags)))
735 }
736}