1use crate::error::{CompilerError, Result};
4use serde::{Deserialize, Serialize};
5
6#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub enum Token {
9 Number(f64),
11 String(String),
12 TemplateLiteral(String),
13 Boolean(bool),
14 Null,
15 Undefined,
16
17 Identifier(String),
19 Keyword(Keyword),
20
21 Plus,
23 Minus,
24 Multiply,
25 Divide,
26 Modulo,
27 Equal,
28 NotEqual,
29 StrictEqual,
30 StrictNotEqual,
31 LessThan,
32 GreaterThan,
33 LessEqual,
34 GreaterEqual,
35 And,
36 Or,
37 Not,
38 Assign,
39 Arrow, PlusAssign,
41 MinusAssign,
42 MultiplyAssign,
43 DivideAssign,
44 Union, Intersection, LeftParen,
49 RightParen,
50 LeftBrace,
51 RightBrace,
52 LeftBracket,
53 RightBracket,
54 Semicolon,
55 Comma,
56 Dot,
57 Colon,
58 QuestionMark,
59
60 TypeAnnotation,
62 GenericStart,
63 GenericEnd,
64
65 Newline,
67 Whitespace,
68 Comment(String),
69 EOF,
70}
71
72#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
74pub enum Keyword {
75 Let,
77 Const,
78 Var,
79 Function,
80 Class,
81 Interface,
82 Type,
83 Enum,
84 Namespace,
85 Module,
86 Import,
87 Export,
88 From,
89 As,
90 Default,
91
92 If,
94 Else,
95 Switch,
96 Case,
97 DefaultCase,
98 For,
99 While,
100 Do,
101 Break,
102 Continue,
103 Return,
104 Throw,
105 Try,
106 Catch,
107 Finally,
108
109 Extends,
111 Implements,
112 Super,
113 This,
114 New,
115 Static,
116 Public,
117 Private,
118 Protected,
119 Abstract,
120 Readonly,
121
122 Async,
124 Await,
125 Promise,
126
127 Any,
129 Unknown,
130 Never,
131 Void,
132 Null,
133 Undefined,
134 Boolean,
135 Number,
136 String,
137 Object,
138 Array,
139 Tuple,
140 Union,
141 Intersection,
142 Literal,
143 Mapped,
144 Conditional,
145 Template,
146
147 Partial,
149 Required,
150 Pick,
151 Omit,
152 Record,
153 Exclude,
154 Extract,
155 NonNullable,
156 Parameters,
157 ReturnType,
158 InstanceType,
159 ThisParameterType,
160 OmitThisParameter,
161 ThisType,
162
163 True,
165 False,
166 In,
167 Of,
168 Instanceof,
169 Typeof,
170 Keyof,
171 Key,
172 Is,
173 Asserts,
174 Infer,
175 Declare,
176 Ambient,
177 Global,
178}
179
180pub struct Lexer {
182 input: String,
183 position: usize,
184 line: usize,
185 column: usize,
186}
187
188impl Lexer {
189 pub fn new(input: String) -> Self {
191 Self {
192 input,
193 position: 0,
194 line: 1,
195 column: 1,
196 }
197 }
198
199 pub fn tokenize(&mut self) -> Result<Vec<Token>> {
201 let mut tokens = Vec::new();
202
203 while self.position < self.input.len() {
204 match self.next_token()? {
205 Some(token) => {
206 println!("Token: {:?}", token);
207 tokens.push(token);
208 }
209 None => break,
210 }
211 }
212
213 tokens.push(Token::EOF);
214 Ok(tokens)
215 }
216
217 fn next_token(&mut self) -> Result<Option<Token>> {
219 self.skip_whitespace();
220
221 if self.position >= self.input.len() {
222 return Ok(None);
223 }
224
225 let ch = self.current_char();
226 let token = match ch {
227 '+' => {
228 if self.peek_char() == Some('=') {
229 self.advance();
230 Ok(Some(Token::PlusAssign))
231 } else if self.peek_char() == Some('+') {
232 self.advance();
233 Ok(Some(Token::Plus)) } else {
235 Ok(Some(Token::Plus))
236 }
237 }
238 '-' => {
239 if self.peek_char() == Some('=') {
240 self.advance();
241 Ok(Some(Token::MinusAssign))
242 } else if self.peek_char() == Some('>') {
243 self.advance();
244 Ok(Some(Token::Arrow))
245 } else {
246 Ok(Some(Token::Minus))
247 }
248 }
249 '*' => {
250 if self.peek_char() == Some('=') {
251 self.advance();
252 Ok(Some(Token::MultiplyAssign))
253 } else {
254 Ok(Some(Token::Multiply))
255 }
256 }
257 '/' => {
258 if self.peek_char() == Some('=') {
259 self.advance();
260 Ok(Some(Token::DivideAssign))
261 } else if self.peek_char() == Some('/') {
262 self.advance();
263 self.skip_line_comment();
264 Ok(None)
265 } else if self.peek_char() == Some('*') {
266 self.advance();
267 self.skip_block_comment();
268 Ok(None)
269 } else {
270 Ok(Some(Token::Divide))
271 }
272 }
273 '%' => Ok(Some(Token::Modulo)),
274 '=' => {
275 if self.peek_char() == Some('=') {
276 self.advance();
277 if self.peek_char() == Some('=') {
278 self.advance();
279 Ok(Some(Token::StrictEqual))
280 } else {
281 Ok(Some(Token::Equal))
282 }
283 } else if self.peek_char() == Some('>') {
284 self.advance();
285 Ok(Some(Token::Arrow))
286 } else {
287 Ok(Some(Token::Assign))
288 }
289 }
290 '!' => {
291 if self.peek_char() == Some('=') {
292 self.advance();
293 if self.peek_char() == Some('=') {
294 self.advance();
295 Ok(Some(Token::StrictNotEqual))
296 } else {
297 Ok(Some(Token::NotEqual))
298 }
299 } else {
300 Ok(Some(Token::Not))
301 }
302 }
303 '<' => {
304 if self.peek_char() == Some('=') {
305 self.advance();
306 Ok(Some(Token::LessEqual))
307 } else {
308 Ok(Some(Token::LessThan))
309 }
310 }
311 '>' => {
312 if self.peek_char() == Some('=') {
313 self.advance();
314 Ok(Some(Token::GreaterEqual))
315 } else {
316 Ok(Some(Token::GreaterThan))
317 }
318 }
319 '&' => {
320 if self.peek_char() == Some('&') {
321 self.advance();
322 Ok(Some(Token::And))
323 } else {
324 Ok(Some(Token::Intersection))
325 }
326 }
327 '|' => {
328 if self.peek_char() == Some('|') {
329 self.advance();
330 Ok(Some(Token::Or))
331 } else {
332 Ok(Some(Token::Union))
333 }
334 }
335 '(' => Ok(Some(Token::LeftParen)),
336 ')' => Ok(Some(Token::RightParen)),
337 '{' => Ok(Some(Token::LeftBrace)),
338 '}' => Ok(Some(Token::RightBrace)),
339 '[' => Ok(Some(Token::LeftBracket)),
340 ']' => Ok(Some(Token::RightBracket)),
341 ';' => Ok(Some(Token::Semicolon)),
342 ',' => Ok(Some(Token::Comma)),
343 '.' => Ok(Some(Token::Dot)),
344 ':' => Ok(Some(Token::Colon)),
345 '?' => Ok(Some(Token::QuestionMark)),
346 '"' | '\'' => Ok(self.parse_string()?),
347 '`' => Ok(self.parse_template_literal()?),
348 '0'..='9' => Ok(self.parse_number()?),
349 'a'..='z' | 'A'..='Z' | '_' | '$' => Ok(self.parse_identifier_or_keyword()?),
350 _ => {
351 return Err(CompilerError::parse_error(
352 self.line,
353 self.column,
354 format!("Unexpected character: {}", ch),
355 ));
356 }
357 };
358
359 match ch {
361 'a'..='z' | 'A'..='Z' | '_' | '$' => {
362 }
364 '0'..='9' => {
365 }
367 '"' | '\'' => {
368 }
370 _ => {
371 self.advance();
373 }
374 }
375 token
376 }
377
378 fn current_char(&self) -> char {
380 self.input.chars().nth(self.position).unwrap_or('\0')
381 }
382
383 fn peek_char(&self) -> Option<char> {
385 self.input.chars().nth(self.position + 1)
386 }
387
388 fn advance(&mut self) {
390 if self.current_char() == '\n' {
391 self.line += 1;
392 self.column = 1;
393 } else {
394 self.column += 1;
395 }
396 self.position += 1;
397 }
398
399 fn skip_whitespace(&mut self) {
401 while self.position < self.input.len() {
402 let ch = self.current_char();
403 if ch.is_whitespace() {
404 self.advance();
405 } else if ch == '/' && self.peek_char() == Some('/') {
406 self.advance(); self.advance(); while self.position < self.input.len() && self.current_char() != '\n' {
410 self.advance();
411 }
412 } else if ch == '/' && self.peek_char() == Some('*') {
413 self.advance(); self.advance(); while self.position < self.input.len() {
417 if self.current_char() == '*' && self.peek_char() == Some('/') {
418 self.advance(); self.advance(); break;
421 }
422 self.advance();
423 }
424 } else {
425 break;
426 }
427 }
428 }
429
430 fn skip_line_comment(&mut self) -> Option<Token> {
432 while self.position < self.input.len() && self.current_char() != '\n' {
433 self.advance();
434 }
435 None
436 }
437
438 fn skip_block_comment(&mut self) -> Option<Token> {
440 while self.position < self.input.len() {
441 if self.current_char() == '*' && self.peek_char() == Some('/') {
442 self.advance();
443 self.advance();
444 break;
445 }
446 self.advance();
447 }
448 None
449 }
450
451 fn parse_string(&mut self) -> Result<Option<Token>> {
453 let quote = self.current_char();
454 let mut value = String::new();
455 self.advance();
456
457 while self.position < self.input.len() {
458 let ch = self.current_char();
459 if ch == quote {
460 self.advance();
461 return Ok(Some(Token::String(value)));
462 } else if ch == '\\' {
463 self.advance();
464 if self.position < self.input.len() {
465 let escaped = self.current_char();
466 value.push(match escaped {
467 'n' => '\n',
468 't' => '\t',
469 'r' => '\r',
470 '\\' => '\\',
471 '"' => '"',
472 '\'' => '\'',
473 _ => escaped,
474 });
475 self.advance();
476 }
477 } else {
478 value.push(ch);
479 self.advance();
480 }
481 }
482
483 Err(CompilerError::parse_error(
484 self.line,
485 self.column,
486 "Unterminated string literal",
487 ))
488 }
489
490 fn parse_template_literal(&mut self) -> Result<Option<Token>> {
492 let mut value = String::new();
493 self.advance(); while self.position < self.input.len() {
496 let ch = self.current_char();
497 if ch == '`' {
498 self.advance();
499 return Ok(Some(Token::TemplateLiteral(value)));
500 } else if ch == '\\' {
501 self.advance();
502 if self.position < self.input.len() {
503 let escaped = self.current_char();
504 value.push(match escaped {
505 'n' => '\n',
506 't' => '\t',
507 'r' => '\r',
508 '\\' => '\\',
509 '`' => '`',
510 '$' => '$',
511 _ => escaped,
512 });
513 self.advance();
514 }
515 } else if ch == '$' && self.position + 1 < self.input.len() && self.input.chars().nth(self.position + 1) == Some('{') {
516 value.push('$');
518 self.advance();
519 if self.position < self.input.len() {
520 value.push('{');
521 self.advance();
522 while self.position < self.input.len() && self.current_char() != '}' {
524 value.push(self.current_char());
525 self.advance();
526 }
527 if self.position < self.input.len() {
528 value.push('}');
529 self.advance();
530 }
531 }
532 } else {
533 value.push(ch);
534 self.advance();
535 }
536 }
537
538 Err(CompilerError::parse_error(
539 self.line,
540 self.column,
541 "Unterminated template literal",
542 ))
543 }
544
545 fn parse_number(&mut self) -> Result<Option<Token>> {
547 let mut value = String::new();
548 let mut has_dot = false;
549
550 while self.position < self.input.len() {
551 let ch = self.current_char();
552 if ch.is_ascii_digit() {
553 value.push(ch);
554 self.advance();
555 } else if ch == '.' && !has_dot {
556 has_dot = true;
557 value.push(ch);
558 self.advance();
559 } else {
560 break;
561 }
562 }
563
564 let number: f64 = value.parse().map_err(|_| {
565 CompilerError::parse_error(self.line, self.column, "Invalid number literal")
566 })?;
567
568 Ok(Some(Token::Number(number)))
569 }
570
571 fn parse_identifier_or_keyword(&mut self) -> Result<Option<Token>> {
573 let mut value = String::new();
574
575 while self.position < self.input.len() {
576 let ch = self.current_char();
577 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
578 value.push(ch);
579 self.advance();
580 } else {
581 break;
582 }
583 }
584
585 if value == "true" {
587 Ok(Some(Token::Boolean(true)))
588 } else if value == "false" {
589 Ok(Some(Token::Boolean(false)))
590 } else if let Some(keyword) = self.parse_keyword(&value) {
591 Ok(Some(Token::Keyword(keyword)))
592 } else {
593 Ok(Some(Token::Identifier(value)))
594 }
595 }
596
597 fn parse_keyword(&self, value: &str) -> Option<Keyword> {
599 match value {
600 "let" => Some(Keyword::Let),
601 "const" => Some(Keyword::Const),
602 "var" => Some(Keyword::Var),
603 "function" => Some(Keyword::Function),
604 "class" => Some(Keyword::Class),
605 "interface" => Some(Keyword::Interface),
606 "type" => Some(Keyword::Type),
607 "enum" => Some(Keyword::Enum),
608 "namespace" => Some(Keyword::Namespace),
609 "module" => Some(Keyword::Module),
610 "import" => Some(Keyword::Import),
611 "export" => Some(Keyword::Export),
612 "from" => Some(Keyword::From),
613 "as" => Some(Keyword::As),
614 "default" => Some(Keyword::Default),
615 "if" => Some(Keyword::If),
616 "else" => Some(Keyword::Else),
617 "switch" => Some(Keyword::Switch),
618 "case" => Some(Keyword::Case),
619 "for" => Some(Keyword::For),
620 "while" => Some(Keyword::While),
621 "do" => Some(Keyword::Do),
622 "break" => Some(Keyword::Break),
623 "continue" => Some(Keyword::Continue),
624 "return" => Some(Keyword::Return),
625 "throw" => Some(Keyword::Throw),
626 "try" => Some(Keyword::Try),
627 "catch" => Some(Keyword::Catch),
628 "finally" => Some(Keyword::Finally),
629 "extends" => Some(Keyword::Extends),
630 "implements" => Some(Keyword::Implements),
631 "super" => Some(Keyword::Super),
632 "this" => Some(Keyword::This),
633 "new" => Some(Keyword::New),
634 "static" => Some(Keyword::Static),
635 "public" => Some(Keyword::Public),
636 "private" => Some(Keyword::Private),
637 "protected" => Some(Keyword::Protected),
638 "abstract" => Some(Keyword::Abstract),
639 "readonly" => Some(Keyword::Readonly),
640 "async" => Some(Keyword::Async),
641 "await" => Some(Keyword::Await),
642 "Promise" => Some(Keyword::Promise),
643 "any" => Some(Keyword::Any),
644 "unknown" => Some(Keyword::Unknown),
645 "never" => Some(Keyword::Never),
646 "void" => Some(Keyword::Void),
647 "null" => Some(Keyword::Null),
648 "undefined" => Some(Keyword::Undefined),
649 "boolean" => Some(Keyword::Boolean),
650 "number" => Some(Keyword::Number),
651 "string" => Some(Keyword::String),
652 "object" => Some(Keyword::Object),
653 "Array" => Some(Keyword::Array),
654 "true" => Some(Keyword::True),
655 "false" => Some(Keyword::False),
656 "in" => Some(Keyword::In),
657 "of" => Some(Keyword::Of),
658 "instanceof" => Some(Keyword::Instanceof),
659 "typeof" => Some(Keyword::Typeof),
660 "keyof" => Some(Keyword::Keyof),
661 "key" => Some(Keyword::Key),
662 "is" => Some(Keyword::Is),
663 "asserts" => Some(Keyword::Asserts),
664 "infer" => Some(Keyword::Infer),
665 "declare" => Some(Keyword::Declare),
666 "global" => Some(Keyword::Global),
667 _ => None,
668 }
669 }
670}