1use crate::error::{CompilerError, Result};
4use serde::{Deserialize, Serialize};
5
6#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub enum Token {
9 Number(f64),
11 String(String),
12 TemplateLiteral(String),
13 Boolean(bool),
14 Null,
15 Undefined,
16
17 Identifier(String),
19 Keyword(Keyword),
20
21 Plus,
23 Minus,
24 Multiply,
25 Divide,
26 Modulo,
27 Equal,
28 NotEqual,
29 StrictEqual,
30 StrictNotEqual,
31 LessThan,
32 GreaterThan,
33 LessEqual,
34 GreaterEqual,
35 And,
36 Or,
37 Not,
38 Assign,
39 PlusAssign,
40 MinusAssign,
41 MultiplyAssign,
42 DivideAssign,
43
44 LeftParen,
46 RightParen,
47 LeftBrace,
48 RightBrace,
49 LeftBracket,
50 RightBracket,
51 Semicolon,
52 Comma,
53 Dot,
54 Colon,
55 QuestionMark,
56 Arrow,
57
58 TypeAnnotation,
60 GenericStart,
61 GenericEnd,
62
63 Newline,
65 Whitespace,
66 Comment(String),
67 EOF,
68}
69
70#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
72pub enum Keyword {
73 Let,
75 Const,
76 Var,
77 Function,
78 Class,
79 Interface,
80 Type,
81 Enum,
82 Namespace,
83 Module,
84 Import,
85 Export,
86 From,
87 As,
88 Default,
89
90 If,
92 Else,
93 Switch,
94 Case,
95 DefaultCase,
96 For,
97 While,
98 Do,
99 Break,
100 Continue,
101 Return,
102 Throw,
103 Try,
104 Catch,
105 Finally,
106
107 Extends,
109 Implements,
110 Super,
111 This,
112 New,
113 Static,
114 Public,
115 Private,
116 Protected,
117 Abstract,
118 Readonly,
119
120 Async,
122 Await,
123 Promise,
124
125 Any,
127 Unknown,
128 Never,
129 Void,
130 Null,
131 Undefined,
132 Boolean,
133 Number,
134 String,
135 Object,
136 Array,
137 Tuple,
138 Union,
139 Intersection,
140 Literal,
141 Mapped,
142 Conditional,
143 Template,
144
145 Partial,
147 Required,
148 Pick,
149 Omit,
150 Record,
151 Exclude,
152 Extract,
153 NonNullable,
154 Parameters,
155 ReturnType,
156 InstanceType,
157 ThisParameterType,
158 OmitThisParameter,
159 ThisType,
160
161 True,
163 False,
164 In,
165 Of,
166 Instanceof,
167 Typeof,
168 Keyof,
169 Is,
170 Asserts,
171 Infer,
172 Declare,
173 Ambient,
174 Global,
175}
176
177pub struct Lexer {
179 input: String,
180 position: usize,
181 line: usize,
182 column: usize,
183}
184
185impl Lexer {
186 pub fn new(input: String) -> Self {
188 Self {
189 input,
190 position: 0,
191 line: 1,
192 column: 1,
193 }
194 }
195
196 pub fn tokenize(&mut self) -> Result<Vec<Token>> {
198 let mut tokens = Vec::new();
199
200 while self.position < self.input.len() {
201 match self.next_token()? {
202 Some(token) => {
203 println!("Token: {:?}", token);
204 tokens.push(token);
205 }
206 None => break,
207 }
208 }
209
210 tokens.push(Token::EOF);
211 Ok(tokens)
212 }
213
214 fn next_token(&mut self) -> Result<Option<Token>> {
216 self.skip_whitespace();
217
218 if self.position >= self.input.len() {
219 return Ok(None);
220 }
221
222 let ch = self.current_char();
223 let token = match ch {
224 '+' => {
225 if self.peek_char() == Some('=') {
226 self.advance();
227 Ok(Some(Token::PlusAssign))
228 } else if self.peek_char() == Some('+') {
229 self.advance();
230 Ok(Some(Token::Plus)) } else {
232 Ok(Some(Token::Plus))
233 }
234 }
235 '-' => {
236 if self.peek_char() == Some('=') {
237 self.advance();
238 Ok(Some(Token::MinusAssign))
239 } else if self.peek_char() == Some('>') {
240 self.advance();
241 Ok(Some(Token::Arrow))
242 } else {
243 Ok(Some(Token::Minus))
244 }
245 }
246 '*' => {
247 if self.peek_char() == Some('=') {
248 self.advance();
249 Ok(Some(Token::MultiplyAssign))
250 } else {
251 Ok(Some(Token::Multiply))
252 }
253 }
254 '/' => {
255 if self.peek_char() == Some('=') {
256 self.advance();
257 Ok(Some(Token::DivideAssign))
258 } else if self.peek_char() == Some('/') {
259 self.advance();
260 self.skip_line_comment();
261 Ok(None)
262 } else if self.peek_char() == Some('*') {
263 self.advance();
264 self.skip_block_comment();
265 Ok(None)
266 } else {
267 Ok(Some(Token::Divide))
268 }
269 }
270 '%' => Ok(Some(Token::Modulo)),
271 '=' => {
272 if self.peek_char() == Some('=') {
273 self.advance();
274 if self.peek_char() == Some('=') {
275 self.advance();
276 Ok(Some(Token::StrictEqual))
277 } else {
278 Ok(Some(Token::Equal))
279 }
280 } else {
281 Ok(Some(Token::Assign))
282 }
283 }
284 '!' => {
285 if self.peek_char() == Some('=') {
286 self.advance();
287 if self.peek_char() == Some('=') {
288 self.advance();
289 Ok(Some(Token::StrictNotEqual))
290 } else {
291 Ok(Some(Token::NotEqual))
292 }
293 } else {
294 Ok(Some(Token::Not))
295 }
296 }
297 '<' => {
298 if self.peek_char() == Some('=') {
299 self.advance();
300 Ok(Some(Token::LessEqual))
301 } else {
302 Ok(Some(Token::LessThan))
303 }
304 }
305 '>' => {
306 if self.peek_char() == Some('=') {
307 self.advance();
308 Ok(Some(Token::GreaterEqual))
309 } else {
310 Ok(Some(Token::GreaterThan))
311 }
312 }
313 '&' => {
314 if self.peek_char() == Some('&') {
315 self.advance();
316 Ok(Some(Token::And))
317 } else {
318 return Err(CompilerError::parse_error(
319 self.line,
320 self.column,
321 "Unexpected character: &",
322 ));
323 }
324 }
325 '|' => {
326 if self.peek_char() == Some('|') {
327 self.advance();
328 Ok(Some(Token::Or))
329 } else {
330 return Err(CompilerError::parse_error(
331 self.line,
332 self.column,
333 "Unexpected character: |",
334 ));
335 }
336 }
337 '(' => Ok(Some(Token::LeftParen)),
338 ')' => Ok(Some(Token::RightParen)),
339 '{' => Ok(Some(Token::LeftBrace)),
340 '}' => Ok(Some(Token::RightBrace)),
341 '[' => Ok(Some(Token::LeftBracket)),
342 ']' => Ok(Some(Token::RightBracket)),
343 ';' => Ok(Some(Token::Semicolon)),
344 ',' => Ok(Some(Token::Comma)),
345 '.' => Ok(Some(Token::Dot)),
346 ':' => Ok(Some(Token::Colon)),
347 '?' => Ok(Some(Token::QuestionMark)),
348 '"' | '\'' => Ok(self.parse_string()?),
349 '`' => Ok(self.parse_template_literal()?),
350 '0'..='9' => Ok(self.parse_number()?),
351 'a'..='z' | 'A'..='Z' | '_' | '$' => Ok(self.parse_identifier_or_keyword()?),
352 _ => {
353 return Err(CompilerError::parse_error(
354 self.line,
355 self.column,
356 format!("Unexpected character: {}", ch),
357 ));
358 }
359 };
360
361 match ch {
363 'a'..='z' | 'A'..='Z' | '_' | '$' => {
364 }
366 '0'..='9' => {
367 }
369 '"' | '\'' => {
370 }
372 _ => {
373 self.advance();
375 }
376 }
377 token
378 }
379
380 fn current_char(&self) -> char {
382 self.input.chars().nth(self.position).unwrap_or('\0')
383 }
384
385 fn peek_char(&self) -> Option<char> {
387 self.input.chars().nth(self.position + 1)
388 }
389
390 fn advance(&mut self) {
392 if self.current_char() == '\n' {
393 self.line += 1;
394 self.column = 1;
395 } else {
396 self.column += 1;
397 }
398 self.position += 1;
399 }
400
401 fn skip_whitespace(&mut self) {
403 while self.position < self.input.len() {
404 let ch = self.current_char();
405 if ch.is_whitespace() {
406 self.advance();
407 } else if ch == '/' && self.peek_char() == Some('/') {
408 self.advance(); self.advance(); while self.position < self.input.len() && self.current_char() != '\n' {
412 self.advance();
413 }
414 } else if ch == '/' && self.peek_char() == Some('*') {
415 self.advance(); self.advance(); while self.position < self.input.len() {
419 if self.current_char() == '*' && self.peek_char() == Some('/') {
420 self.advance(); self.advance(); break;
423 }
424 self.advance();
425 }
426 } else {
427 break;
428 }
429 }
430 }
431
432 fn skip_line_comment(&mut self) -> Option<Token> {
434 while self.position < self.input.len() && self.current_char() != '\n' {
435 self.advance();
436 }
437 None
438 }
439
440 fn skip_block_comment(&mut self) -> Option<Token> {
442 while self.position < self.input.len() {
443 if self.current_char() == '*' && self.peek_char() == Some('/') {
444 self.advance();
445 self.advance();
446 break;
447 }
448 self.advance();
449 }
450 None
451 }
452
453 fn parse_string(&mut self) -> Result<Option<Token>> {
455 let quote = self.current_char();
456 let mut value = String::new();
457 self.advance();
458
459 while self.position < self.input.len() {
460 let ch = self.current_char();
461 if ch == quote {
462 self.advance();
463 return Ok(Some(Token::String(value)));
464 } else if ch == '\\' {
465 self.advance();
466 if self.position < self.input.len() {
467 let escaped = self.current_char();
468 value.push(match escaped {
469 'n' => '\n',
470 't' => '\t',
471 'r' => '\r',
472 '\\' => '\\',
473 '"' => '"',
474 '\'' => '\'',
475 _ => escaped,
476 });
477 self.advance();
478 }
479 } else {
480 value.push(ch);
481 self.advance();
482 }
483 }
484
485 Err(CompilerError::parse_error(
486 self.line,
487 self.column,
488 "Unterminated string literal",
489 ))
490 }
491
492 fn parse_template_literal(&mut self) -> Result<Option<Token>> {
494 let mut value = String::new();
495 self.advance(); while self.position < self.input.len() {
498 let ch = self.current_char();
499 if ch == '`' {
500 self.advance();
501 return Ok(Some(Token::TemplateLiteral(value)));
502 } else if ch == '\\' {
503 self.advance();
504 if self.position < self.input.len() {
505 let escaped = self.current_char();
506 value.push(match escaped {
507 'n' => '\n',
508 't' => '\t',
509 'r' => '\r',
510 '\\' => '\\',
511 '`' => '`',
512 '$' => '$',
513 _ => escaped,
514 });
515 self.advance();
516 }
517 } else {
518 value.push(ch);
519 self.advance();
520 }
521 }
522
523 Err(CompilerError::parse_error(
524 self.line,
525 self.column,
526 "Unterminated template literal",
527 ))
528 }
529
530 fn parse_number(&mut self) -> Result<Option<Token>> {
532 let mut value = String::new();
533 let mut has_dot = false;
534
535 while self.position < self.input.len() {
536 let ch = self.current_char();
537 if ch.is_ascii_digit() {
538 value.push(ch);
539 self.advance();
540 } else if ch == '.' && !has_dot {
541 has_dot = true;
542 value.push(ch);
543 self.advance();
544 } else {
545 break;
546 }
547 }
548
549 let number: f64 = value.parse().map_err(|_| {
550 CompilerError::parse_error(self.line, self.column, "Invalid number literal")
551 })?;
552
553 Ok(Some(Token::Number(number)))
554 }
555
556 fn parse_identifier_or_keyword(&mut self) -> Result<Option<Token>> {
558 let mut value = String::new();
559
560 while self.position < self.input.len() {
561 let ch = self.current_char();
562 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
563 value.push(ch);
564 self.advance();
565 } else {
566 break;
567 }
568 }
569
570 if let Some(keyword) = self.parse_keyword(&value) {
572 Ok(Some(Token::Keyword(keyword)))
573 } else {
574 Ok(Some(Token::Identifier(value)))
575 }
576 }
577
578 fn parse_keyword(&self, value: &str) -> Option<Keyword> {
580 match value {
581 "let" => Some(Keyword::Let),
582 "const" => Some(Keyword::Const),
583 "var" => Some(Keyword::Var),
584 "function" => Some(Keyword::Function),
585 "class" => Some(Keyword::Class),
586 "interface" => Some(Keyword::Interface),
587 "type" => Some(Keyword::Type),
588 "enum" => Some(Keyword::Enum),
589 "namespace" => Some(Keyword::Namespace),
590 "module" => Some(Keyword::Module),
591 "import" => Some(Keyword::Import),
592 "export" => Some(Keyword::Export),
593 "from" => Some(Keyword::From),
594 "as" => Some(Keyword::As),
595 "default" => Some(Keyword::Default),
596 "if" => Some(Keyword::If),
597 "else" => Some(Keyword::Else),
598 "switch" => Some(Keyword::Switch),
599 "case" => Some(Keyword::Case),
600 "for" => Some(Keyword::For),
601 "while" => Some(Keyword::While),
602 "do" => Some(Keyword::Do),
603 "break" => Some(Keyword::Break),
604 "continue" => Some(Keyword::Continue),
605 "return" => Some(Keyword::Return),
606 "throw" => Some(Keyword::Throw),
607 "try" => Some(Keyword::Try),
608 "catch" => Some(Keyword::Catch),
609 "finally" => Some(Keyword::Finally),
610 "extends" => Some(Keyword::Extends),
611 "implements" => Some(Keyword::Implements),
612 "super" => Some(Keyword::Super),
613 "this" => Some(Keyword::This),
614 "new" => Some(Keyword::New),
615 "static" => Some(Keyword::Static),
616 "public" => Some(Keyword::Public),
617 "private" => Some(Keyword::Private),
618 "protected" => Some(Keyword::Protected),
619 "abstract" => Some(Keyword::Abstract),
620 "readonly" => Some(Keyword::Readonly),
621 "async" => Some(Keyword::Async),
622 "await" => Some(Keyword::Await),
623 "Promise" => Some(Keyword::Promise),
624 "any" => Some(Keyword::Any),
625 "unknown" => Some(Keyword::Unknown),
626 "never" => Some(Keyword::Never),
627 "void" => Some(Keyword::Void),
628 "null" => Some(Keyword::Null),
629 "undefined" => Some(Keyword::Undefined),
630 "boolean" => Some(Keyword::Boolean),
631 "number" => Some(Keyword::Number),
632 "string" => Some(Keyword::String),
633 "object" => Some(Keyword::Object),
634 "Array" => Some(Keyword::Array),
635 "true" => Some(Keyword::True),
636 "false" => Some(Keyword::False),
637 "in" => Some(Keyword::In),
638 "of" => Some(Keyword::Of),
639 "instanceof" => Some(Keyword::Instanceof),
640 "typeof" => Some(Keyword::Typeof),
641 "keyof" => Some(Keyword::Keyof),
642 "is" => Some(Keyword::Is),
643 "asserts" => Some(Keyword::Asserts),
644 "infer" => Some(Keyword::Infer),
645 "declare" => Some(Keyword::Declare),
646 "global" => Some(Keyword::Global),
647 _ => None,
648 }
649 }
650}