1use crate::error::{CompilerError, Result};
4use serde::{Deserialize, Serialize};
5
6#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub enum Token {
9 Number(f64),
11 String(String),
12 TemplateLiteral(String),
13 Boolean(bool),
14 Null,
15 Undefined,
16
17 Identifier(String),
19 Keyword(Keyword),
20
21 Plus,
23 Minus,
24 Multiply,
25 Divide,
26 Modulo,
27 Equal,
28 NotEqual,
29 StrictEqual,
30 StrictNotEqual,
31 LessThan,
32 GreaterThan,
33 LessEqual,
34 GreaterEqual,
35 And,
36 Or,
37 Not,
38 Assign,
39 PlusAssign,
40 MinusAssign,
41 MultiplyAssign,
42 DivideAssign,
43 Union, Intersection, LeftParen,
48 RightParen,
49 LeftBrace,
50 RightBrace,
51 LeftBracket,
52 RightBracket,
53 Semicolon,
54 Comma,
55 Dot,
56 Colon,
57 QuestionMark,
58 Arrow,
59
60 TypeAnnotation,
62 GenericStart,
63 GenericEnd,
64
65 Newline,
67 Whitespace,
68 Comment(String),
69 EOF,
70}
71
72#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
74pub enum Keyword {
75 Let,
77 Const,
78 Var,
79 Function,
80 Class,
81 Interface,
82 Type,
83 Enum,
84 Namespace,
85 Module,
86 Import,
87 Export,
88 From,
89 As,
90 Default,
91
92 If,
94 Else,
95 Switch,
96 Case,
97 DefaultCase,
98 For,
99 While,
100 Do,
101 Break,
102 Continue,
103 Return,
104 Throw,
105 Try,
106 Catch,
107 Finally,
108
109 Extends,
111 Implements,
112 Super,
113 This,
114 New,
115 Static,
116 Public,
117 Private,
118 Protected,
119 Abstract,
120 Readonly,
121
122 Async,
124 Await,
125 Promise,
126
127 Any,
129 Unknown,
130 Never,
131 Void,
132 Null,
133 Undefined,
134 Boolean,
135 Number,
136 String,
137 Object,
138 Array,
139 Tuple,
140 Union,
141 Intersection,
142 Literal,
143 Mapped,
144 Conditional,
145 Template,
146
147 Partial,
149 Required,
150 Pick,
151 Omit,
152 Record,
153 Exclude,
154 Extract,
155 NonNullable,
156 Parameters,
157 ReturnType,
158 InstanceType,
159 ThisParameterType,
160 OmitThisParameter,
161 ThisType,
162
163 True,
165 False,
166 In,
167 Of,
168 Instanceof,
169 Typeof,
170 Keyof,
171 Key,
172 Is,
173 Asserts,
174 Infer,
175 Declare,
176 Ambient,
177 Global,
178}
179
180pub struct Lexer {
182 input: String,
183 position: usize,
184 line: usize,
185 column: usize,
186}
187
188impl Lexer {
189 pub fn new(input: String) -> Self {
191 Self {
192 input,
193 position: 0,
194 line: 1,
195 column: 1,
196 }
197 }
198
199 pub fn tokenize(&mut self) -> Result<Vec<Token>> {
201 let mut tokens = Vec::new();
202
203 while self.position < self.input.len() {
204 match self.next_token()? {
205 Some(token) => {
206 println!("Token: {:?}", token);
207 tokens.push(token);
208 }
209 None => break,
210 }
211 }
212
213 tokens.push(Token::EOF);
214 Ok(tokens)
215 }
216
217 fn next_token(&mut self) -> Result<Option<Token>> {
219 self.skip_whitespace();
220
221 if self.position >= self.input.len() {
222 return Ok(None);
223 }
224
225 let ch = self.current_char();
226 let token = match ch {
227 '+' => {
228 if self.peek_char() == Some('=') {
229 self.advance();
230 Ok(Some(Token::PlusAssign))
231 } else if self.peek_char() == Some('+') {
232 self.advance();
233 Ok(Some(Token::Plus)) } else {
235 Ok(Some(Token::Plus))
236 }
237 }
238 '-' => {
239 if self.peek_char() == Some('=') {
240 self.advance();
241 Ok(Some(Token::MinusAssign))
242 } else if self.peek_char() == Some('>') {
243 self.advance();
244 Ok(Some(Token::Arrow))
245 } else {
246 Ok(Some(Token::Minus))
247 }
248 }
249 '*' => {
250 if self.peek_char() == Some('=') {
251 self.advance();
252 Ok(Some(Token::MultiplyAssign))
253 } else {
254 Ok(Some(Token::Multiply))
255 }
256 }
257 '/' => {
258 if self.peek_char() == Some('=') {
259 self.advance();
260 Ok(Some(Token::DivideAssign))
261 } else if self.peek_char() == Some('/') {
262 self.advance();
263 self.skip_line_comment();
264 Ok(None)
265 } else if self.peek_char() == Some('*') {
266 self.advance();
267 self.skip_block_comment();
268 Ok(None)
269 } else {
270 Ok(Some(Token::Divide))
271 }
272 }
273 '%' => Ok(Some(Token::Modulo)),
274 '=' => {
275 if self.peek_char() == Some('=') {
276 self.advance();
277 if self.peek_char() == Some('=') {
278 self.advance();
279 Ok(Some(Token::StrictEqual))
280 } else {
281 Ok(Some(Token::Equal))
282 }
283 } else {
284 Ok(Some(Token::Assign))
285 }
286 }
287 '!' => {
288 if self.peek_char() == Some('=') {
289 self.advance();
290 if self.peek_char() == Some('=') {
291 self.advance();
292 Ok(Some(Token::StrictNotEqual))
293 } else {
294 Ok(Some(Token::NotEqual))
295 }
296 } else {
297 Ok(Some(Token::Not))
298 }
299 }
300 '<' => {
301 if self.peek_char() == Some('=') {
302 self.advance();
303 Ok(Some(Token::LessEqual))
304 } else {
305 Ok(Some(Token::LessThan))
306 }
307 }
308 '>' => {
309 if self.peek_char() == Some('=') {
310 self.advance();
311 Ok(Some(Token::GreaterEqual))
312 } else {
313 Ok(Some(Token::GreaterThan))
314 }
315 }
316 '&' => {
317 if self.peek_char() == Some('&') {
318 self.advance();
319 Ok(Some(Token::And))
320 } else {
321 Ok(Some(Token::Intersection))
322 }
323 }
324 '|' => {
325 if self.peek_char() == Some('|') {
326 self.advance();
327 Ok(Some(Token::Or))
328 } else {
329 Ok(Some(Token::Union))
330 }
331 }
332 '(' => Ok(Some(Token::LeftParen)),
333 ')' => Ok(Some(Token::RightParen)),
334 '{' => Ok(Some(Token::LeftBrace)),
335 '}' => Ok(Some(Token::RightBrace)),
336 '[' => Ok(Some(Token::LeftBracket)),
337 ']' => Ok(Some(Token::RightBracket)),
338 ';' => Ok(Some(Token::Semicolon)),
339 ',' => Ok(Some(Token::Comma)),
340 '.' => Ok(Some(Token::Dot)),
341 ':' => Ok(Some(Token::Colon)),
342 '?' => Ok(Some(Token::QuestionMark)),
343 '"' | '\'' => Ok(self.parse_string()?),
344 '`' => Ok(self.parse_template_literal()?),
345 '0'..='9' => Ok(self.parse_number()?),
346 'a'..='z' | 'A'..='Z' | '_' | '$' => Ok(self.parse_identifier_or_keyword()?),
347 _ => {
348 return Err(CompilerError::parse_error(
349 self.line,
350 self.column,
351 format!("Unexpected character: {}", ch),
352 ));
353 }
354 };
355
356 match ch {
358 'a'..='z' | 'A'..='Z' | '_' | '$' => {
359 }
361 '0'..='9' => {
362 }
364 '"' | '\'' => {
365 }
367 _ => {
368 self.advance();
370 }
371 }
372 token
373 }
374
375 fn current_char(&self) -> char {
377 self.input.chars().nth(self.position).unwrap_or('\0')
378 }
379
380 fn peek_char(&self) -> Option<char> {
382 self.input.chars().nth(self.position + 1)
383 }
384
385 fn advance(&mut self) {
387 if self.current_char() == '\n' {
388 self.line += 1;
389 self.column = 1;
390 } else {
391 self.column += 1;
392 }
393 self.position += 1;
394 }
395
396 fn skip_whitespace(&mut self) {
398 while self.position < self.input.len() {
399 let ch = self.current_char();
400 if ch.is_whitespace() {
401 self.advance();
402 } else if ch == '/' && self.peek_char() == Some('/') {
403 self.advance(); self.advance(); while self.position < self.input.len() && self.current_char() != '\n' {
407 self.advance();
408 }
409 } else if ch == '/' && self.peek_char() == Some('*') {
410 self.advance(); self.advance(); while self.position < self.input.len() {
414 if self.current_char() == '*' && self.peek_char() == Some('/') {
415 self.advance(); self.advance(); break;
418 }
419 self.advance();
420 }
421 } else {
422 break;
423 }
424 }
425 }
426
427 fn skip_line_comment(&mut self) -> Option<Token> {
429 while self.position < self.input.len() && self.current_char() != '\n' {
430 self.advance();
431 }
432 None
433 }
434
435 fn skip_block_comment(&mut self) -> Option<Token> {
437 while self.position < self.input.len() {
438 if self.current_char() == '*' && self.peek_char() == Some('/') {
439 self.advance();
440 self.advance();
441 break;
442 }
443 self.advance();
444 }
445 None
446 }
447
448 fn parse_string(&mut self) -> Result<Option<Token>> {
450 let quote = self.current_char();
451 let mut value = String::new();
452 self.advance();
453
454 while self.position < self.input.len() {
455 let ch = self.current_char();
456 if ch == quote {
457 self.advance();
458 return Ok(Some(Token::String(value)));
459 } else if ch == '\\' {
460 self.advance();
461 if self.position < self.input.len() {
462 let escaped = self.current_char();
463 value.push(match escaped {
464 'n' => '\n',
465 't' => '\t',
466 'r' => '\r',
467 '\\' => '\\',
468 '"' => '"',
469 '\'' => '\'',
470 _ => escaped,
471 });
472 self.advance();
473 }
474 } else {
475 value.push(ch);
476 self.advance();
477 }
478 }
479
480 Err(CompilerError::parse_error(
481 self.line,
482 self.column,
483 "Unterminated string literal",
484 ))
485 }
486
487 fn parse_template_literal(&mut self) -> Result<Option<Token>> {
489 let mut value = String::new();
490 self.advance(); while self.position < self.input.len() {
493 let ch = self.current_char();
494 if ch == '`' {
495 self.advance();
496 return Ok(Some(Token::TemplateLiteral(value)));
497 } else if ch == '\\' {
498 self.advance();
499 if self.position < self.input.len() {
500 let escaped = self.current_char();
501 value.push(match escaped {
502 'n' => '\n',
503 't' => '\t',
504 'r' => '\r',
505 '\\' => '\\',
506 '`' => '`',
507 '$' => '$',
508 _ => escaped,
509 });
510 self.advance();
511 }
512 } else if ch == '$' && self.position + 1 < self.input.len() && self.input.chars().nth(self.position + 1) == Some('{') {
513 value.push('$');
515 self.advance();
516 if self.position < self.input.len() {
517 value.push('{');
518 self.advance();
519 while self.position < self.input.len() && self.current_char() != '}' {
521 value.push(self.current_char());
522 self.advance();
523 }
524 if self.position < self.input.len() {
525 value.push('}');
526 self.advance();
527 }
528 }
529 } else {
530 value.push(ch);
531 self.advance();
532 }
533 }
534
535 Err(CompilerError::parse_error(
536 self.line,
537 self.column,
538 "Unterminated template literal",
539 ))
540 }
541
542 fn parse_number(&mut self) -> Result<Option<Token>> {
544 let mut value = String::new();
545 let mut has_dot = false;
546
547 while self.position < self.input.len() {
548 let ch = self.current_char();
549 if ch.is_ascii_digit() {
550 value.push(ch);
551 self.advance();
552 } else if ch == '.' && !has_dot {
553 has_dot = true;
554 value.push(ch);
555 self.advance();
556 } else {
557 break;
558 }
559 }
560
561 let number: f64 = value.parse().map_err(|_| {
562 CompilerError::parse_error(self.line, self.column, "Invalid number literal")
563 })?;
564
565 Ok(Some(Token::Number(number)))
566 }
567
568 fn parse_identifier_or_keyword(&mut self) -> Result<Option<Token>> {
570 let mut value = String::new();
571
572 while self.position < self.input.len() {
573 let ch = self.current_char();
574 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
575 value.push(ch);
576 self.advance();
577 } else {
578 break;
579 }
580 }
581
582 if value == "true" {
584 Ok(Some(Token::Boolean(true)))
585 } else if value == "false" {
586 Ok(Some(Token::Boolean(false)))
587 } else if let Some(keyword) = self.parse_keyword(&value) {
588 Ok(Some(Token::Keyword(keyword)))
589 } else {
590 Ok(Some(Token::Identifier(value)))
591 }
592 }
593
594 fn parse_keyword(&self, value: &str) -> Option<Keyword> {
596 match value {
597 "let" => Some(Keyword::Let),
598 "const" => Some(Keyword::Const),
599 "var" => Some(Keyword::Var),
600 "function" => Some(Keyword::Function),
601 "class" => Some(Keyword::Class),
602 "interface" => Some(Keyword::Interface),
603 "type" => Some(Keyword::Type),
604 "enum" => Some(Keyword::Enum),
605 "namespace" => Some(Keyword::Namespace),
606 "module" => Some(Keyword::Module),
607 "import" => Some(Keyword::Import),
608 "export" => Some(Keyword::Export),
609 "from" => Some(Keyword::From),
610 "as" => Some(Keyword::As),
611 "default" => Some(Keyword::Default),
612 "if" => Some(Keyword::If),
613 "else" => Some(Keyword::Else),
614 "switch" => Some(Keyword::Switch),
615 "case" => Some(Keyword::Case),
616 "for" => Some(Keyword::For),
617 "while" => Some(Keyword::While),
618 "do" => Some(Keyword::Do),
619 "break" => Some(Keyword::Break),
620 "continue" => Some(Keyword::Continue),
621 "return" => Some(Keyword::Return),
622 "throw" => Some(Keyword::Throw),
623 "try" => Some(Keyword::Try),
624 "catch" => Some(Keyword::Catch),
625 "finally" => Some(Keyword::Finally),
626 "extends" => Some(Keyword::Extends),
627 "implements" => Some(Keyword::Implements),
628 "super" => Some(Keyword::Super),
629 "this" => Some(Keyword::This),
630 "new" => Some(Keyword::New),
631 "static" => Some(Keyword::Static),
632 "public" => Some(Keyword::Public),
633 "private" => Some(Keyword::Private),
634 "protected" => Some(Keyword::Protected),
635 "abstract" => Some(Keyword::Abstract),
636 "readonly" => Some(Keyword::Readonly),
637 "async" => Some(Keyword::Async),
638 "await" => Some(Keyword::Await),
639 "Promise" => Some(Keyword::Promise),
640 "any" => Some(Keyword::Any),
641 "unknown" => Some(Keyword::Unknown),
642 "never" => Some(Keyword::Never),
643 "void" => Some(Keyword::Void),
644 "null" => Some(Keyword::Null),
645 "undefined" => Some(Keyword::Undefined),
646 "boolean" => Some(Keyword::Boolean),
647 "number" => Some(Keyword::Number),
648 "string" => Some(Keyword::String),
649 "object" => Some(Keyword::Object),
650 "Array" => Some(Keyword::Array),
651 "true" => Some(Keyword::True),
652 "false" => Some(Keyword::False),
653 "in" => Some(Keyword::In),
654 "of" => Some(Keyword::Of),
655 "instanceof" => Some(Keyword::Instanceof),
656 "typeof" => Some(Keyword::Typeof),
657 "keyof" => Some(Keyword::Keyof),
658 "key" => Some(Keyword::Key),
659 "is" => Some(Keyword::Is),
660 "asserts" => Some(Keyword::Asserts),
661 "infer" => Some(Keyword::Infer),
662 "declare" => Some(Keyword::Declare),
663 "global" => Some(Keyword::Global),
664 _ => None,
665 }
666 }
667}