1use crate::error::{CompilerError, Result};
4use serde::{Deserialize, Serialize};
5
6#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub enum Token {
9 Number(f64),
11 String(String),
12 TemplateLiteral(String),
13 Boolean(bool),
14 Null,
15 Undefined,
16
17 Identifier(String),
19 Keyword(Keyword),
20
21 Plus,
23 Minus,
24 Multiply,
25 Divide,
26 Modulo,
27 Equal,
28 NotEqual,
29 StrictEqual,
30 StrictNotEqual,
31 LessThan,
32 GreaterThan,
33 LessEqual,
34 GreaterEqual,
35 And,
36 Or,
37 Not,
38 Assign,
39 Arrow, PlusAssign,
41 MinusAssign,
42 MultiplyAssign,
43 DivideAssign,
44 Union, Intersection, LeftParen,
49 RightParen,
50 LeftBrace,
51 RightBrace,
52 LeftBracket,
53 RightBracket,
54 Semicolon,
55 Comma,
56 Dot,
57 Colon,
58 QuestionMark,
59
60 TypeAnnotation,
62 GenericStart,
63 GenericEnd,
64
65 Newline,
67 Whitespace,
68 Comment(String),
69 EOF,
70}
71
72#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
74pub enum Keyword {
75 Let,
77 Const,
78 Var,
79 Function,
80 Class,
81 Interface,
82 Type,
83 Enum,
84 Namespace,
85 Module,
86 Import,
87 Export,
88 From,
89 As,
90 Default,
91
92 If,
94 Else,
95 Switch,
96 Case,
97 DefaultCase,
98 For,
99 While,
100 Do,
101 Break,
102 Continue,
103 Return,
104 Throw,
105 Try,
106 Catch,
107 Finally,
108
109 Extends,
111 Implements,
112 Super,
113 This,
114 New,
115 Static,
116 Public,
117 Private,
118 Protected,
119 Abstract,
120 Readonly,
121 Get,
122 Set,
123
124 Async,
126 Await,
127 Promise,
128
129 Any,
131 Unknown,
132 Never,
133 Void,
134 Null,
135 Undefined,
136 Boolean,
137 Number,
138 String,
139 Object,
140 Array,
141 Tuple,
142 Union,
143 Intersection,
144 Literal,
145 Mapped,
146 Conditional,
147 Template,
148
149 Partial,
151 Required,
152 Pick,
153 Omit,
154 Record,
155 Exclude,
156 Extract,
157 NonNullable,
158 Parameters,
159 ReturnType,
160 InstanceType,
161 ThisParameterType,
162 OmitThisParameter,
163 ThisType,
164
165 True,
167 False,
168 In,
169 Of,
170 Instanceof,
171 Typeof,
172 Keyof,
173 Key,
174 Is,
175 Asserts,
176 Infer,
177 Declare,
178 Ambient,
179 Global,
180}
181
182pub struct Lexer {
184 input: String,
185 position: usize,
186 line: usize,
187 column: usize,
188}
189
190impl Lexer {
191 pub fn new(input: String) -> Self {
193 Self {
194 input,
195 position: 0,
196 line: 1,
197 column: 1,
198 }
199 }
200
201 pub fn tokenize(&mut self) -> Result<Vec<Token>> {
203 let mut tokens = Vec::new();
204
205 while self.position < self.input.len() {
206 match self.next_token()? {
207 Some(token) => {
208 tokens.push(token);
209 }
210 None => break,
211 }
212 }
213
214 tokens.push(Token::EOF);
215 Ok(tokens)
216 }
217
218 fn next_token(&mut self) -> Result<Option<Token>> {
220 self.skip_whitespace();
221
222 if self.position >= self.input.len() {
223 return Ok(None);
224 }
225
226 let ch = self.current_char();
227 let token = match ch {
228 '+' => {
229 if self.peek_char() == Some('=') {
230 self.advance();
231 Ok(Some(Token::PlusAssign))
232 } else if self.peek_char() == Some('+') {
233 self.advance();
234 Ok(Some(Token::Plus)) } else {
236 Ok(Some(Token::Plus))
237 }
238 }
239 '-' => {
240 if self.peek_char() == Some('=') {
241 self.advance();
242 Ok(Some(Token::MinusAssign))
243 } else if self.peek_char() == Some('>') {
244 self.advance();
245 Ok(Some(Token::Arrow))
246 } else {
247 Ok(Some(Token::Minus))
248 }
249 }
250 '*' => {
251 if self.peek_char() == Some('=') {
252 self.advance();
253 Ok(Some(Token::MultiplyAssign))
254 } else {
255 Ok(Some(Token::Multiply))
256 }
257 }
258 '/' => {
259 if self.peek_char() == Some('=') {
260 self.advance();
261 Ok(Some(Token::DivideAssign))
262 } else if self.peek_char() == Some('/') {
263 self.advance();
264 self.skip_line_comment();
265 Ok(None)
266 } else if self.peek_char() == Some('*') {
267 self.advance();
268 self.skip_block_comment();
269 Ok(None)
270 } else {
271 Ok(Some(Token::Divide))
272 }
273 }
274 '%' => Ok(Some(Token::Modulo)),
275 '=' => {
276 if self.peek_char() == Some('=') {
277 self.advance();
278 if self.peek_char() == Some('=') {
279 self.advance();
280 Ok(Some(Token::StrictEqual))
281 } else {
282 Ok(Some(Token::Equal))
283 }
284 } else if self.peek_char() == Some('>') {
285 self.advance();
286 Ok(Some(Token::Arrow))
287 } else {
288 Ok(Some(Token::Assign))
289 }
290 }
291 '!' => {
292 if self.peek_char() == Some('=') {
293 self.advance();
294 if self.peek_char() == Some('=') {
295 self.advance();
296 Ok(Some(Token::StrictNotEqual))
297 } else {
298 Ok(Some(Token::NotEqual))
299 }
300 } else {
301 Ok(Some(Token::Not))
302 }
303 }
304 '<' => {
305 if self.peek_char() == Some('=') {
306 self.advance();
307 Ok(Some(Token::LessEqual))
308 } else {
309 Ok(Some(Token::LessThan))
310 }
311 }
312 '>' => {
313 if self.peek_char() == Some('=') {
314 self.advance();
315 Ok(Some(Token::GreaterEqual))
316 } else {
317 Ok(Some(Token::GreaterThan))
318 }
319 }
320 '&' => {
321 if self.peek_char() == Some('&') {
322 self.advance();
323 Ok(Some(Token::And))
324 } else {
325 Ok(Some(Token::Intersection))
326 }
327 }
328 '|' => {
329 if self.peek_char() == Some('|') {
330 self.advance();
331 Ok(Some(Token::Or))
332 } else {
333 Ok(Some(Token::Union))
334 }
335 }
336 '(' => Ok(Some(Token::LeftParen)),
337 ')' => Ok(Some(Token::RightParen)),
338 '{' => Ok(Some(Token::LeftBrace)),
339 '}' => Ok(Some(Token::RightBrace)),
340 '[' => Ok(Some(Token::LeftBracket)),
341 ']' => Ok(Some(Token::RightBracket)),
342 ';' => Ok(Some(Token::Semicolon)),
343 ',' => Ok(Some(Token::Comma)),
344 '.' => Ok(Some(Token::Dot)),
345 ':' => Ok(Some(Token::Colon)),
346 '?' => Ok(Some(Token::QuestionMark)),
347 '"' | '\'' => Ok(self.parse_string()?),
348 '`' => Ok(self.parse_template_literal()?),
349 '0'..='9' => Ok(self.parse_number()?),
350 'a'..='z' | 'A'..='Z' | '_' | '$' => Ok(self.parse_identifier_or_keyword()?),
351 _ => {
352 return Err(CompilerError::parse_error(
353 self.line,
354 self.column,
355 format!("Unexpected character: {}", ch),
356 ));
357 }
358 };
359
360 match ch {
362 'a'..='z' | 'A'..='Z' | '_' | '$' => {
363 }
365 '0'..='9' => {
366 }
368 '"' | '\'' => {
369 }
371 _ => {
372 self.advance();
374 }
375 }
376 token
377 }
378
379 fn current_char(&self) -> char {
381 self.input.chars().nth(self.position).unwrap_or('\0')
382 }
383
384 fn peek_char(&self) -> Option<char> {
386 self.input.chars().nth(self.position + 1)
387 }
388
389 fn advance(&mut self) {
391 if self.current_char() == '\n' {
392 self.line += 1;
393 self.column = 1;
394 } else {
395 self.column += 1;
396 }
397 self.position += 1;
398 }
399
400 fn skip_whitespace(&mut self) {
402 while self.position < self.input.len() {
403 let ch = self.current_char();
404 if ch.is_whitespace() {
405 self.advance();
406 } else if ch == '/' && self.peek_char() == Some('/') {
407 self.advance(); self.advance(); while self.position < self.input.len() && self.current_char() != '\n' {
411 self.advance();
412 }
413 } else if ch == '/' && self.peek_char() == Some('*') {
414 self.advance(); self.advance(); while self.position < self.input.len() {
418 if self.current_char() == '*' && self.peek_char() == Some('/') {
419 self.advance(); self.advance(); break;
422 }
423 self.advance();
424 }
425 } else {
426 break;
427 }
428 }
429 }
430
431 fn skip_line_comment(&mut self) -> Option<Token> {
433 while self.position < self.input.len() && self.current_char() != '\n' {
434 self.advance();
435 }
436 None
437 }
438
439 fn skip_block_comment(&mut self) -> Option<Token> {
441 while self.position < self.input.len() {
442 if self.current_char() == '*' && self.peek_char() == Some('/') {
443 self.advance();
444 self.advance();
445 break;
446 }
447 self.advance();
448 }
449 None
450 }
451
452 fn parse_string(&mut self) -> Result<Option<Token>> {
454 let quote = self.current_char();
455 let mut value = String::new();
456 self.advance();
457
458 while self.position < self.input.len() {
459 let ch = self.current_char();
460 if ch == quote {
461 self.advance();
462 return Ok(Some(Token::String(value)));
463 } else if ch == '\\' {
464 self.advance();
465 if self.position < self.input.len() {
466 let escaped = self.current_char();
467 value.push(match escaped {
468 'n' => '\n',
469 't' => '\t',
470 'r' => '\r',
471 '\\' => '\\',
472 '"' => '"',
473 '\'' => '\'',
474 _ => escaped,
475 });
476 self.advance();
477 }
478 } else {
479 value.push(ch);
480 self.advance();
481 }
482 }
483
484 Err(CompilerError::parse_error(
485 self.line,
486 self.column,
487 "Unterminated string literal",
488 ))
489 }
490
491 fn parse_template_literal(&mut self) -> Result<Option<Token>> {
493 let mut value = String::new();
494 self.advance(); while self.position < self.input.len() {
497 let ch = self.current_char();
498 if ch == '`' {
499 self.advance();
500 return Ok(Some(Token::TemplateLiteral(value)));
501 } else if ch == '\\' {
502 self.advance();
503 if self.position < self.input.len() {
504 let escaped = self.current_char();
505 value.push(match escaped {
506 'n' => '\n',
507 't' => '\t',
508 'r' => '\r',
509 '\\' => '\\',
510 '`' => '`',
511 '$' => '$',
512 _ => escaped,
513 });
514 self.advance();
515 }
516 } else if ch == '$' && self.position + 1 < self.input.len() && self.input.chars().nth(self.position + 1) == Some('{') {
517 value.push('$');
519 self.advance();
520 if self.position < self.input.len() {
521 value.push('{');
522 self.advance();
523 while self.position < self.input.len() && self.current_char() != '}' {
525 value.push(self.current_char());
526 self.advance();
527 }
528 if self.position < self.input.len() {
529 value.push('}');
530 self.advance();
531 }
532 }
533 } else {
534 value.push(ch);
535 self.advance();
536 }
537 }
538
539 Err(CompilerError::parse_error(
540 self.line,
541 self.column,
542 "Unterminated template literal",
543 ))
544 }
545
546 fn parse_number(&mut self) -> Result<Option<Token>> {
548 let mut value = String::new();
549 let mut has_dot = false;
550
551 while self.position < self.input.len() {
552 let ch = self.current_char();
553 if ch.is_ascii_digit() {
554 value.push(ch);
555 self.advance();
556 } else if ch == '.' && !has_dot {
557 has_dot = true;
558 value.push(ch);
559 self.advance();
560 } else {
561 break;
562 }
563 }
564
565 let number: f64 = value.parse().map_err(|_| {
566 CompilerError::parse_error(self.line, self.column, "Invalid number literal")
567 })?;
568
569 Ok(Some(Token::Number(number)))
570 }
571
572 fn parse_identifier_or_keyword(&mut self) -> Result<Option<Token>> {
574 let mut value = String::new();
575
576 while self.position < self.input.len() {
577 let ch = self.current_char();
578 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
579 value.push(ch);
580 self.advance();
581 } else {
582 break;
583 }
584 }
585
586 if value == "true" {
588 Ok(Some(Token::Boolean(true)))
589 } else if value == "false" {
590 Ok(Some(Token::Boolean(false)))
591 } else if let Some(keyword) = self.parse_keyword(&value) {
592 Ok(Some(Token::Keyword(keyword)))
593 } else {
594 Ok(Some(Token::Identifier(value)))
595 }
596 }
597
598 fn parse_keyword(&self, value: &str) -> Option<Keyword> {
600 match value {
601 "let" => Some(Keyword::Let),
602 "const" => Some(Keyword::Const),
603 "var" => Some(Keyword::Var),
604 "function" => Some(Keyword::Function),
605 "class" => Some(Keyword::Class),
606 "interface" => Some(Keyword::Interface),
607 "type" => Some(Keyword::Type),
608 "enum" => Some(Keyword::Enum),
609 "namespace" => Some(Keyword::Namespace),
610 "module" => Some(Keyword::Module),
611 "import" => Some(Keyword::Import),
612 "export" => Some(Keyword::Export),
613 "from" => Some(Keyword::From),
614 "as" => Some(Keyword::As),
615 "default" => Some(Keyword::Default),
616 "if" => Some(Keyword::If),
617 "else" => Some(Keyword::Else),
618 "switch" => Some(Keyword::Switch),
619 "case" => Some(Keyword::Case),
620 "for" => Some(Keyword::For),
621 "while" => Some(Keyword::While),
622 "do" => Some(Keyword::Do),
623 "break" => Some(Keyword::Break),
624 "continue" => Some(Keyword::Continue),
625 "return" => Some(Keyword::Return),
626 "throw" => Some(Keyword::Throw),
627 "try" => Some(Keyword::Try),
628 "catch" => Some(Keyword::Catch),
629 "finally" => Some(Keyword::Finally),
630 "extends" => Some(Keyword::Extends),
631 "implements" => Some(Keyword::Implements),
632 "super" => Some(Keyword::Super),
633 "this" => Some(Keyword::This),
634 "new" => Some(Keyword::New),
635 "static" => Some(Keyword::Static),
636 "public" => Some(Keyword::Public),
637 "private" => Some(Keyword::Private),
638 "protected" => Some(Keyword::Protected),
639 "abstract" => Some(Keyword::Abstract),
640 "readonly" => Some(Keyword::Readonly),
641 "get" => Some(Keyword::Get),
642 "set" => Some(Keyword::Set),
643 "async" => Some(Keyword::Async),
644 "await" => Some(Keyword::Await),
645 "Promise" => Some(Keyword::Promise),
646 "any" => Some(Keyword::Any),
647 "unknown" => Some(Keyword::Unknown),
648 "never" => Some(Keyword::Never),
649 "void" => Some(Keyword::Void),
650 "null" => Some(Keyword::Null),
651 "undefined" => Some(Keyword::Undefined),
652 "boolean" => Some(Keyword::Boolean),
653 "number" => Some(Keyword::Number),
654 "string" => Some(Keyword::String),
655 "object" => Some(Keyword::Object),
656 "Array" => Some(Keyword::Array),
657 "true" => Some(Keyword::True),
658 "false" => Some(Keyword::False),
659 "in" => Some(Keyword::In),
660 "of" => Some(Keyword::Of),
661 "instanceof" => Some(Keyword::Instanceof),
662 "typeof" => Some(Keyword::Typeof),
663 "keyof" => Some(Keyword::Keyof),
664 "key" => Some(Keyword::Key),
665 "is" => Some(Keyword::Is),
666 "asserts" => Some(Keyword::Asserts),
667 "infer" => Some(Keyword::Infer),
668 "declare" => Some(Keyword::Declare),
669 "global" => Some(Keyword::Global),
670 _ => None,
671 }
672 }
673}