1use crate::error::{CompilerError, Result};
4use serde::{Deserialize, Serialize};
5
6#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub enum Token {
9 Number(f64),
11 String(String),
12 TemplateLiteral(String),
13 Boolean(bool),
14 Null,
15 Undefined,
16
17 Identifier(String),
19 Keyword(Keyword),
20
21 Plus,
23 Minus,
24 Multiply,
25 Divide,
26 Modulo,
27 Equal,
28 NotEqual,
29 StrictEqual,
30 StrictNotEqual,
31 LessThan,
32 GreaterThan,
33 LessEqual,
34 GreaterEqual,
35 And,
36 Or,
37 Not,
38 Assign,
39 Arrow, PlusAssign,
41 MinusAssign,
42 MultiplyAssign,
43 DivideAssign,
44 Union, Intersection, LeftParen,
49 RightParen,
50 LeftBrace,
51 RightBrace,
52 LeftBracket,
53 RightBracket,
54 Semicolon,
55 Comma,
56 Dot,
57 Colon,
58 QuestionMark,
59
60 TypeAnnotation,
62 GenericStart,
63 GenericEnd,
64
65 Newline,
67 Whitespace,
68 Comment(String),
69 EOF,
70}
71
72#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
74pub enum Keyword {
75 Let,
77 Const,
78 Var,
79 Function,
80 Class,
81 Interface,
82 Type,
83 Enum,
84 Namespace,
85 Module,
86 Import,
87 Export,
88 From,
89 As,
90 Default,
91
92 If,
94 Else,
95 Switch,
96 Case,
97 DefaultCase,
98 For,
99 While,
100 Do,
101 Break,
102 Continue,
103 Return,
104 Throw,
105 Try,
106 Catch,
107 Finally,
108
109 Extends,
111 Implements,
112 Super,
113 This,
114 New,
115 Static,
116 Public,
117 Private,
118 Protected,
119 Abstract,
120 Readonly,
121 Get,
122 Set,
123
124 Async,
126 Await,
127 Promise,
128
129 Any,
131 Unknown,
132 Never,
133 Void,
134 Null,
135 Undefined,
136 Boolean,
137 Number,
138 String,
139 Object,
140 Array,
141 Tuple,
142 Union,
143 Intersection,
144 Literal,
145 Mapped,
146 Conditional,
147 Template,
148
149 Partial,
151 Required,
152 Pick,
153 Omit,
154 Record,
155 Exclude,
156 Extract,
157 NonNullable,
158 Parameters,
159 ReturnType,
160 InstanceType,
161 ThisParameterType,
162 OmitThisParameter,
163 ThisType,
164
165 True,
167 False,
168 In,
169 Of,
170 Instanceof,
171 Typeof,
172 Keyof,
173 Key,
174 Is,
175 Asserts,
176 Infer,
177 Declare,
178 Ambient,
179 Global,
180}
181
182pub struct Lexer {
184 input: String,
185 position: usize,
186 line: usize,
187 column: usize,
188}
189
190impl Lexer {
191 pub fn new(input: String) -> Self {
193 Self {
194 input,
195 position: 0,
196 line: 1,
197 column: 1,
198 }
199 }
200
201 pub fn tokenize(&mut self) -> Result<Vec<Token>> {
203 let mut tokens = Vec::new();
204
205 while self.position < self.input.len() {
206 match self.next_token()? {
207 Some(token) => {
208 println!("Token: {:?}", token);
209 tokens.push(token);
210 }
211 None => break,
212 }
213 }
214
215 tokens.push(Token::EOF);
216 Ok(tokens)
217 }
218
219 fn next_token(&mut self) -> Result<Option<Token>> {
221 self.skip_whitespace();
222
223 if self.position >= self.input.len() {
224 return Ok(None);
225 }
226
227 let ch = self.current_char();
228 let token = match ch {
229 '+' => {
230 if self.peek_char() == Some('=') {
231 self.advance();
232 Ok(Some(Token::PlusAssign))
233 } else if self.peek_char() == Some('+') {
234 self.advance();
235 Ok(Some(Token::Plus)) } else {
237 Ok(Some(Token::Plus))
238 }
239 }
240 '-' => {
241 if self.peek_char() == Some('=') {
242 self.advance();
243 Ok(Some(Token::MinusAssign))
244 } else if self.peek_char() == Some('>') {
245 self.advance();
246 Ok(Some(Token::Arrow))
247 } else {
248 Ok(Some(Token::Minus))
249 }
250 }
251 '*' => {
252 if self.peek_char() == Some('=') {
253 self.advance();
254 Ok(Some(Token::MultiplyAssign))
255 } else {
256 Ok(Some(Token::Multiply))
257 }
258 }
259 '/' => {
260 if self.peek_char() == Some('=') {
261 self.advance();
262 Ok(Some(Token::DivideAssign))
263 } else if self.peek_char() == Some('/') {
264 self.advance();
265 self.skip_line_comment();
266 Ok(None)
267 } else if self.peek_char() == Some('*') {
268 self.advance();
269 self.skip_block_comment();
270 Ok(None)
271 } else {
272 Ok(Some(Token::Divide))
273 }
274 }
275 '%' => Ok(Some(Token::Modulo)),
276 '=' => {
277 if self.peek_char() == Some('=') {
278 self.advance();
279 if self.peek_char() == Some('=') {
280 self.advance();
281 Ok(Some(Token::StrictEqual))
282 } else {
283 Ok(Some(Token::Equal))
284 }
285 } else if self.peek_char() == Some('>') {
286 self.advance();
287 Ok(Some(Token::Arrow))
288 } else {
289 Ok(Some(Token::Assign))
290 }
291 }
292 '!' => {
293 if self.peek_char() == Some('=') {
294 self.advance();
295 if self.peek_char() == Some('=') {
296 self.advance();
297 Ok(Some(Token::StrictNotEqual))
298 } else {
299 Ok(Some(Token::NotEqual))
300 }
301 } else {
302 Ok(Some(Token::Not))
303 }
304 }
305 '<' => {
306 if self.peek_char() == Some('=') {
307 self.advance();
308 Ok(Some(Token::LessEqual))
309 } else {
310 Ok(Some(Token::LessThan))
311 }
312 }
313 '>' => {
314 if self.peek_char() == Some('=') {
315 self.advance();
316 Ok(Some(Token::GreaterEqual))
317 } else {
318 Ok(Some(Token::GreaterThan))
319 }
320 }
321 '&' => {
322 if self.peek_char() == Some('&') {
323 self.advance();
324 Ok(Some(Token::And))
325 } else {
326 Ok(Some(Token::Intersection))
327 }
328 }
329 '|' => {
330 if self.peek_char() == Some('|') {
331 self.advance();
332 Ok(Some(Token::Or))
333 } else {
334 Ok(Some(Token::Union))
335 }
336 }
337 '(' => Ok(Some(Token::LeftParen)),
338 ')' => Ok(Some(Token::RightParen)),
339 '{' => Ok(Some(Token::LeftBrace)),
340 '}' => Ok(Some(Token::RightBrace)),
341 '[' => Ok(Some(Token::LeftBracket)),
342 ']' => Ok(Some(Token::RightBracket)),
343 ';' => Ok(Some(Token::Semicolon)),
344 ',' => Ok(Some(Token::Comma)),
345 '.' => Ok(Some(Token::Dot)),
346 ':' => Ok(Some(Token::Colon)),
347 '?' => Ok(Some(Token::QuestionMark)),
348 '"' | '\'' => Ok(self.parse_string()?),
349 '`' => Ok(self.parse_template_literal()?),
350 '0'..='9' => Ok(self.parse_number()?),
351 'a'..='z' | 'A'..='Z' | '_' | '$' => Ok(self.parse_identifier_or_keyword()?),
352 _ => {
353 return Err(CompilerError::parse_error(
354 self.line,
355 self.column,
356 format!("Unexpected character: {}", ch),
357 ));
358 }
359 };
360
361 match ch {
363 'a'..='z' | 'A'..='Z' | '_' | '$' => {
364 }
366 '0'..='9' => {
367 }
369 '"' | '\'' => {
370 }
372 _ => {
373 self.advance();
375 }
376 }
377 token
378 }
379
380 fn current_char(&self) -> char {
382 self.input.chars().nth(self.position).unwrap_or('\0')
383 }
384
385 fn peek_char(&self) -> Option<char> {
387 self.input.chars().nth(self.position + 1)
388 }
389
390 fn advance(&mut self) {
392 if self.current_char() == '\n' {
393 self.line += 1;
394 self.column = 1;
395 } else {
396 self.column += 1;
397 }
398 self.position += 1;
399 }
400
401 fn skip_whitespace(&mut self) {
403 while self.position < self.input.len() {
404 let ch = self.current_char();
405 if ch.is_whitespace() {
406 self.advance();
407 } else if ch == '/' && self.peek_char() == Some('/') {
408 self.advance(); self.advance(); while self.position < self.input.len() && self.current_char() != '\n' {
412 self.advance();
413 }
414 } else if ch == '/' && self.peek_char() == Some('*') {
415 self.advance(); self.advance(); while self.position < self.input.len() {
419 if self.current_char() == '*' && self.peek_char() == Some('/') {
420 self.advance(); self.advance(); break;
423 }
424 self.advance();
425 }
426 } else {
427 break;
428 }
429 }
430 }
431
432 fn skip_line_comment(&mut self) -> Option<Token> {
434 while self.position < self.input.len() && self.current_char() != '\n' {
435 self.advance();
436 }
437 None
438 }
439
440 fn skip_block_comment(&mut self) -> Option<Token> {
442 while self.position < self.input.len() {
443 if self.current_char() == '*' && self.peek_char() == Some('/') {
444 self.advance();
445 self.advance();
446 break;
447 }
448 self.advance();
449 }
450 None
451 }
452
453 fn parse_string(&mut self) -> Result<Option<Token>> {
455 let quote = self.current_char();
456 let mut value = String::new();
457 self.advance();
458
459 while self.position < self.input.len() {
460 let ch = self.current_char();
461 if ch == quote {
462 self.advance();
463 return Ok(Some(Token::String(value)));
464 } else if ch == '\\' {
465 self.advance();
466 if self.position < self.input.len() {
467 let escaped = self.current_char();
468 value.push(match escaped {
469 'n' => '\n',
470 't' => '\t',
471 'r' => '\r',
472 '\\' => '\\',
473 '"' => '"',
474 '\'' => '\'',
475 _ => escaped,
476 });
477 self.advance();
478 }
479 } else {
480 value.push(ch);
481 self.advance();
482 }
483 }
484
485 Err(CompilerError::parse_error(
486 self.line,
487 self.column,
488 "Unterminated string literal",
489 ))
490 }
491
492 fn parse_template_literal(&mut self) -> Result<Option<Token>> {
494 let mut value = String::new();
495 self.advance(); while self.position < self.input.len() {
498 let ch = self.current_char();
499 if ch == '`' {
500 self.advance();
501 return Ok(Some(Token::TemplateLiteral(value)));
502 } else if ch == '\\' {
503 self.advance();
504 if self.position < self.input.len() {
505 let escaped = self.current_char();
506 value.push(match escaped {
507 'n' => '\n',
508 't' => '\t',
509 'r' => '\r',
510 '\\' => '\\',
511 '`' => '`',
512 '$' => '$',
513 _ => escaped,
514 });
515 self.advance();
516 }
517 } else if ch == '$' && self.position + 1 < self.input.len() && self.input.chars().nth(self.position + 1) == Some('{') {
518 value.push('$');
520 self.advance();
521 if self.position < self.input.len() {
522 value.push('{');
523 self.advance();
524 while self.position < self.input.len() && self.current_char() != '}' {
526 value.push(self.current_char());
527 self.advance();
528 }
529 if self.position < self.input.len() {
530 value.push('}');
531 self.advance();
532 }
533 }
534 } else {
535 value.push(ch);
536 self.advance();
537 }
538 }
539
540 Err(CompilerError::parse_error(
541 self.line,
542 self.column,
543 "Unterminated template literal",
544 ))
545 }
546
547 fn parse_number(&mut self) -> Result<Option<Token>> {
549 let mut value = String::new();
550 let mut has_dot = false;
551
552 while self.position < self.input.len() {
553 let ch = self.current_char();
554 if ch.is_ascii_digit() {
555 value.push(ch);
556 self.advance();
557 } else if ch == '.' && !has_dot {
558 has_dot = true;
559 value.push(ch);
560 self.advance();
561 } else {
562 break;
563 }
564 }
565
566 let number: f64 = value.parse().map_err(|_| {
567 CompilerError::parse_error(self.line, self.column, "Invalid number literal")
568 })?;
569
570 Ok(Some(Token::Number(number)))
571 }
572
573 fn parse_identifier_or_keyword(&mut self) -> Result<Option<Token>> {
575 let mut value = String::new();
576
577 while self.position < self.input.len() {
578 let ch = self.current_char();
579 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
580 value.push(ch);
581 self.advance();
582 } else {
583 break;
584 }
585 }
586
587 if value == "true" {
589 Ok(Some(Token::Boolean(true)))
590 } else if value == "false" {
591 Ok(Some(Token::Boolean(false)))
592 } else if let Some(keyword) = self.parse_keyword(&value) {
593 Ok(Some(Token::Keyword(keyword)))
594 } else {
595 Ok(Some(Token::Identifier(value)))
596 }
597 }
598
599 fn parse_keyword(&self, value: &str) -> Option<Keyword> {
601 match value {
602 "let" => Some(Keyword::Let),
603 "const" => Some(Keyword::Const),
604 "var" => Some(Keyword::Var),
605 "function" => Some(Keyword::Function),
606 "class" => Some(Keyword::Class),
607 "interface" => Some(Keyword::Interface),
608 "type" => Some(Keyword::Type),
609 "enum" => Some(Keyword::Enum),
610 "namespace" => Some(Keyword::Namespace),
611 "module" => Some(Keyword::Module),
612 "import" => Some(Keyword::Import),
613 "export" => Some(Keyword::Export),
614 "from" => Some(Keyword::From),
615 "as" => Some(Keyword::As),
616 "default" => Some(Keyword::Default),
617 "if" => Some(Keyword::If),
618 "else" => Some(Keyword::Else),
619 "switch" => Some(Keyword::Switch),
620 "case" => Some(Keyword::Case),
621 "for" => Some(Keyword::For),
622 "while" => Some(Keyword::While),
623 "do" => Some(Keyword::Do),
624 "break" => Some(Keyword::Break),
625 "continue" => Some(Keyword::Continue),
626 "return" => Some(Keyword::Return),
627 "throw" => Some(Keyword::Throw),
628 "try" => Some(Keyword::Try),
629 "catch" => Some(Keyword::Catch),
630 "finally" => Some(Keyword::Finally),
631 "extends" => Some(Keyword::Extends),
632 "implements" => Some(Keyword::Implements),
633 "super" => Some(Keyword::Super),
634 "this" => Some(Keyword::This),
635 "new" => Some(Keyword::New),
636 "static" => Some(Keyword::Static),
637 "public" => Some(Keyword::Public),
638 "private" => Some(Keyword::Private),
639 "protected" => Some(Keyword::Protected),
640 "abstract" => Some(Keyword::Abstract),
641 "readonly" => Some(Keyword::Readonly),
642 "get" => Some(Keyword::Get),
643 "set" => Some(Keyword::Set),
644 "async" => Some(Keyword::Async),
645 "await" => Some(Keyword::Await),
646 "Promise" => Some(Keyword::Promise),
647 "any" => Some(Keyword::Any),
648 "unknown" => Some(Keyword::Unknown),
649 "never" => Some(Keyword::Never),
650 "void" => Some(Keyword::Void),
651 "null" => Some(Keyword::Null),
652 "undefined" => Some(Keyword::Undefined),
653 "boolean" => Some(Keyword::Boolean),
654 "number" => Some(Keyword::Number),
655 "string" => Some(Keyword::String),
656 "object" => Some(Keyword::Object),
657 "Array" => Some(Keyword::Array),
658 "true" => Some(Keyword::True),
659 "false" => Some(Keyword::False),
660 "in" => Some(Keyword::In),
661 "of" => Some(Keyword::Of),
662 "instanceof" => Some(Keyword::Instanceof),
663 "typeof" => Some(Keyword::Typeof),
664 "keyof" => Some(Keyword::Keyof),
665 "key" => Some(Keyword::Key),
666 "is" => Some(Keyword::Is),
667 "asserts" => Some(Keyword::Asserts),
668 "infer" => Some(Keyword::Infer),
669 "declare" => Some(Keyword::Declare),
670 "global" => Some(Keyword::Global),
671 _ => None,
672 }
673 }
674}