1use crate::error::{CompilerError, Result};
4use serde::{Deserialize, Serialize};
5
6#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub enum Token {
9 Number(f64),
11 String(String),
12 TemplateLiteral(String),
13 Boolean(bool),
14 Null,
15 Undefined,
16
17 Identifier(String),
19 Keyword(Keyword),
20
21 Plus,
23 Minus,
24 Multiply,
25 Divide,
26 Modulo,
27 Equal,
28 NotEqual,
29 StrictEqual,
30 StrictNotEqual,
31 LessThan,
32 GreaterThan,
33 LessEqual,
34 GreaterEqual,
35 And,
36 Or,
37 Not,
38 Assign,
39 PlusAssign,
40 MinusAssign,
41 MultiplyAssign,
42 DivideAssign,
43
44 LeftParen,
46 RightParen,
47 LeftBrace,
48 RightBrace,
49 LeftBracket,
50 RightBracket,
51 Semicolon,
52 Comma,
53 Dot,
54 Colon,
55 QuestionMark,
56 Arrow,
57
58 TypeAnnotation,
60 GenericStart,
61 GenericEnd,
62
63 Newline,
65 Whitespace,
66 Comment(String),
67 EOF,
68}
69
70#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
72pub enum Keyword {
73 Let,
75 Const,
76 Var,
77 Function,
78 Class,
79 Interface,
80 Type,
81 Enum,
82 Namespace,
83 Module,
84 Import,
85 Export,
86 From,
87 As,
88 Default,
89
90 If,
92 Else,
93 Switch,
94 Case,
95 DefaultCase,
96 For,
97 While,
98 Do,
99 Break,
100 Continue,
101 Return,
102 Throw,
103 Try,
104 Catch,
105 Finally,
106
107 Extends,
109 Implements,
110 Super,
111 This,
112 New,
113 Static,
114 Public,
115 Private,
116 Protected,
117 Abstract,
118 Readonly,
119
120 Async,
122 Await,
123 Promise,
124
125 Any,
127 Unknown,
128 Never,
129 Void,
130 Null,
131 Undefined,
132 Boolean,
133 Number,
134 String,
135 Object,
136 Array,
137 Tuple,
138 Union,
139 Intersection,
140 Literal,
141 Mapped,
142 Conditional,
143 Template,
144
145 Partial,
147 Required,
148 Pick,
149 Omit,
150 Record,
151 Exclude,
152 Extract,
153 NonNullable,
154 Parameters,
155 ReturnType,
156 InstanceType,
157 ThisParameterType,
158 OmitThisParameter,
159 ThisType,
160
161 True,
163 False,
164 In,
165 Of,
166 Instanceof,
167 Typeof,
168 Keyof,
169 Is,
170 Asserts,
171 Infer,
172 Declare,
173 Ambient,
174 Global,
175}
176
177pub struct Lexer {
179 input: String,
180 position: usize,
181 line: usize,
182 column: usize,
183}
184
185impl Lexer {
186 pub fn new(input: String) -> Self {
188 Self {
189 input,
190 position: 0,
191 line: 1,
192 column: 1,
193 }
194 }
195
196 pub fn tokenize(&mut self) -> Result<Vec<Token>> {
198 let mut tokens = Vec::new();
199
200 while self.position < self.input.len() {
201 match self.next_token()? {
202 Some(token) => {
203 println!("Token: {:?}", token);
204 tokens.push(token);
205 },
206 None => break,
207 }
208 }
209
210 tokens.push(Token::EOF);
211 Ok(tokens)
212 }
213
214 fn next_token(&mut self) -> Result<Option<Token>> {
216 self.skip_whitespace();
217
218 if self.position >= self.input.len() {
219 return Ok(None);
220 }
221
222 let ch = self.current_char();
223 let token = match ch {
224 '+' => {
225 if self.peek_char() == Some('=') {
226 self.advance();
227 Ok(Some(Token::PlusAssign))
228 } else if self.peek_char() == Some('+') {
229 self.advance();
230 Ok(Some(Token::Plus)) } else {
232 Ok(Some(Token::Plus))
233 }
234 }
235 '-' => {
236 if self.peek_char() == Some('=') {
237 self.advance();
238 Ok(Some(Token::MinusAssign))
239 } else if self.peek_char() == Some('>') {
240 self.advance();
241 Ok(Some(Token::Arrow))
242 } else {
243 Ok(Some(Token::Minus))
244 }
245 }
246 '*' => {
247 if self.peek_char() == Some('=') {
248 self.advance();
249 Ok(Some(Token::MultiplyAssign))
250 } else {
251 Ok(Some(Token::Multiply))
252 }
253 }
254 '/' => {
255 if self.peek_char() == Some('=') {
256 self.advance();
257 Ok(Some(Token::DivideAssign))
258 } else if self.peek_char() == Some('/') {
259 self.advance();
260 self.skip_line_comment();
261 Ok(None)
262 } else if self.peek_char() == Some('*') {
263 self.advance();
264 self.skip_block_comment();
265 Ok(None)
266 } else {
267 Ok(Some(Token::Divide))
268 }
269 }
270 '%' => Ok(Some(Token::Modulo)),
271 '=' => {
272 if self.peek_char() == Some('=') {
273 self.advance();
274 if self.peek_char() == Some('=') {
275 self.advance();
276 Ok(Some(Token::StrictEqual))
277 } else {
278 Ok(Some(Token::Equal))
279 }
280 } else {
281 Ok(Some(Token::Assign))
282 }
283 }
284 '!' => {
285 if self.peek_char() == Some('=') {
286 self.advance();
287 if self.peek_char() == Some('=') {
288 self.advance();
289 Ok(Some(Token::StrictNotEqual))
290 } else {
291 Ok(Some(Token::NotEqual))
292 }
293 } else {
294 Ok(Some(Token::Not))
295 }
296 }
297 '<' => {
298 if self.peek_char() == Some('=') {
299 self.advance();
300 Ok(Some(Token::LessEqual))
301 } else {
302 Ok(Some(Token::LessThan))
303 }
304 }
305 '>' => {
306 if self.peek_char() == Some('=') {
307 self.advance();
308 Ok(Some(Token::GreaterEqual))
309 } else {
310 Ok(Some(Token::GreaterThan))
311 }
312 }
313 '&' => {
314 if self.peek_char() == Some('&') {
315 self.advance();
316 Ok(Some(Token::And))
317 } else {
318 return Err(CompilerError::parse_error(
319 self.line,
320 self.column,
321 "Unexpected character: &",
322 ));
323 }
324 }
325 '|' => {
326 if self.peek_char() == Some('|') {
327 self.advance();
328 Ok(Some(Token::Or))
329 } else {
330 return Err(CompilerError::parse_error(
331 self.line,
332 self.column,
333 "Unexpected character: |",
334 ));
335 }
336 }
337 '(' => Ok(Some(Token::LeftParen)),
338 ')' => Ok(Some(Token::RightParen)),
339 '{' => Ok(Some(Token::LeftBrace)),
340 '}' => Ok(Some(Token::RightBrace)),
341 '[' => Ok(Some(Token::LeftBracket)),
342 ']' => Ok(Some(Token::RightBracket)),
343 ';' => Ok(Some(Token::Semicolon)),
344 ',' => Ok(Some(Token::Comma)),
345 '.' => Ok(Some(Token::Dot)),
346 ':' => Ok(Some(Token::Colon)),
347 '?' => Ok(Some(Token::QuestionMark)),
348 '"' | '\'' => Ok(self.parse_string()?),
349 '`' => Ok(self.parse_template_literal()?),
350 '0'..='9' => Ok(self.parse_number()?),
351 'a'..='z' | 'A'..='Z' | '_' | '$' => Ok(self.parse_identifier_or_keyword()?),
352 _ => {
353 return Err(CompilerError::parse_error(
354 self.line,
355 self.column,
356 format!("Unexpected character: {}", ch),
357 ));
358 }
359 };
360
361 match ch {
363 'a'..='z' | 'A'..='Z' | '_' | '$' => {
364 }
366 '0'..='9' => {
367 }
369 '"' | '\'' => {
370 }
372 _ => {
373 self.advance();
375 }
376 }
377 token
378 }
379
380 fn current_char(&self) -> char {
382 self.input.chars().nth(self.position).unwrap_or('\0')
383 }
384
385 fn peek_char(&self) -> Option<char> {
387 self.input.chars().nth(self.position + 1)
388 }
389
390 fn advance(&mut self) {
392 if self.current_char() == '\n' {
393 self.line += 1;
394 self.column = 1;
395 } else {
396 self.column += 1;
397 }
398 self.position += 1;
399 }
400
401 fn skip_whitespace(&mut self) {
403 while self.position < self.input.len() {
404 let ch = self.current_char();
405 if ch.is_whitespace() {
406 self.advance();
407 } else {
408 break;
409 }
410 }
411 }
412
413 fn skip_line_comment(&mut self) -> Option<Token> {
415 while self.position < self.input.len() && self.current_char() != '\n' {
416 self.advance();
417 }
418 None
419 }
420
421 fn skip_block_comment(&mut self) -> Option<Token> {
423 while self.position < self.input.len() {
424 if self.current_char() == '*' && self.peek_char() == Some('/') {
425 self.advance();
426 self.advance();
427 break;
428 }
429 self.advance();
430 }
431 None
432 }
433
434 fn parse_string(&mut self) -> Result<Option<Token>> {
436 let quote = self.current_char();
437 let mut value = String::new();
438 self.advance();
439
440 while self.position < self.input.len() {
441 let ch = self.current_char();
442 if ch == quote {
443 self.advance();
444 return Ok(Some(Token::String(value)));
445 } else if ch == '\\' {
446 self.advance();
447 if self.position < self.input.len() {
448 let escaped = self.current_char();
449 value.push(match escaped {
450 'n' => '\n',
451 't' => '\t',
452 'r' => '\r',
453 '\\' => '\\',
454 '"' => '"',
455 '\'' => '\'',
456 _ => escaped,
457 });
458 self.advance();
459 }
460 } else {
461 value.push(ch);
462 self.advance();
463 }
464 }
465
466 Err(CompilerError::parse_error(
467 self.line,
468 self.column,
469 "Unterminated string literal",
470 ))
471 }
472
473 fn parse_template_literal(&mut self) -> Result<Option<Token>> {
475 let mut value = String::new();
476 self.advance(); while self.position < self.input.len() {
479 let ch = self.current_char();
480 if ch == '`' {
481 self.advance();
482 return Ok(Some(Token::TemplateLiteral(value)));
483 } else if ch == '\\' {
484 self.advance();
485 if self.position < self.input.len() {
486 let escaped = self.current_char();
487 value.push(match escaped {
488 'n' => '\n',
489 't' => '\t',
490 'r' => '\r',
491 '\\' => '\\',
492 '`' => '`',
493 '$' => '$',
494 _ => escaped,
495 });
496 self.advance();
497 }
498 } else {
499 value.push(ch);
500 self.advance();
501 }
502 }
503
504 Err(CompilerError::parse_error(
505 self.line,
506 self.column,
507 "Unterminated template literal",
508 ))
509 }
510
511 fn parse_number(&mut self) -> Result<Option<Token>> {
513 let mut value = String::new();
514 let mut has_dot = false;
515
516 while self.position < self.input.len() {
517 let ch = self.current_char();
518 if ch.is_ascii_digit() {
519 value.push(ch);
520 self.advance();
521 } else if ch == '.' && !has_dot {
522 has_dot = true;
523 value.push(ch);
524 self.advance();
525 } else {
526 break;
527 }
528 }
529
530 let number: f64 = value.parse().map_err(|_| {
531 CompilerError::parse_error(self.line, self.column, "Invalid number literal")
532 })?;
533
534 Ok(Some(Token::Number(number)))
535 }
536
537 fn parse_identifier_or_keyword(&mut self) -> Result<Option<Token>> {
539 let mut value = String::new();
540
541 while self.position < self.input.len() {
542 let ch = self.current_char();
543 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
544 value.push(ch);
545 self.advance();
546 } else {
547 break;
548 }
549 }
550
551 if let Some(keyword) = self.parse_keyword(&value) {
553 Ok(Some(Token::Keyword(keyword)))
554 } else {
555 Ok(Some(Token::Identifier(value)))
556 }
557 }
558
559 fn parse_keyword(&self, value: &str) -> Option<Keyword> {
561 match value {
562 "let" => Some(Keyword::Let),
563 "const" => Some(Keyword::Const),
564 "var" => Some(Keyword::Var),
565 "function" => Some(Keyword::Function),
566 "class" => Some(Keyword::Class),
567 "interface" => Some(Keyword::Interface),
568 "type" => Some(Keyword::Type),
569 "enum" => Some(Keyword::Enum),
570 "namespace" => Some(Keyword::Namespace),
571 "module" => Some(Keyword::Module),
572 "import" => Some(Keyword::Import),
573 "export" => Some(Keyword::Export),
574 "from" => Some(Keyword::From),
575 "as" => Some(Keyword::As),
576 "default" => Some(Keyword::Default),
577 "if" => Some(Keyword::If),
578 "else" => Some(Keyword::Else),
579 "switch" => Some(Keyword::Switch),
580 "case" => Some(Keyword::Case),
581 "for" => Some(Keyword::For),
582 "while" => Some(Keyword::While),
583 "do" => Some(Keyword::Do),
584 "break" => Some(Keyword::Break),
585 "continue" => Some(Keyword::Continue),
586 "return" => Some(Keyword::Return),
587 "throw" => Some(Keyword::Throw),
588 "try" => Some(Keyword::Try),
589 "catch" => Some(Keyword::Catch),
590 "finally" => Some(Keyword::Finally),
591 "extends" => Some(Keyword::Extends),
592 "implements" => Some(Keyword::Implements),
593 "super" => Some(Keyword::Super),
594 "this" => Some(Keyword::This),
595 "new" => Some(Keyword::New),
596 "static" => Some(Keyword::Static),
597 "public" => Some(Keyword::Public),
598 "private" => Some(Keyword::Private),
599 "protected" => Some(Keyword::Protected),
600 "abstract" => Some(Keyword::Abstract),
601 "readonly" => Some(Keyword::Readonly),
602 "async" => Some(Keyword::Async),
603 "await" => Some(Keyword::Await),
604 "Promise" => Some(Keyword::Promise),
605 "any" => Some(Keyword::Any),
606 "unknown" => Some(Keyword::Unknown),
607 "never" => Some(Keyword::Never),
608 "void" => Some(Keyword::Void),
609 "null" => Some(Keyword::Null),
610 "undefined" => Some(Keyword::Undefined),
611 "boolean" => Some(Keyword::Boolean),
612 "number" => Some(Keyword::Number),
613 "string" => Some(Keyword::String),
614 "object" => Some(Keyword::Object),
615 "Array" => Some(Keyword::Array),
616 "true" => Some(Keyword::True),
617 "false" => Some(Keyword::False),
618 "in" => Some(Keyword::In),
619 "of" => Some(Keyword::Of),
620 "instanceof" => Some(Keyword::Instanceof),
621 "typeof" => Some(Keyword::Typeof),
622 "keyof" => Some(Keyword::Keyof),
623 "is" => Some(Keyword::Is),
624 "asserts" => Some(Keyword::Asserts),
625 "infer" => Some(Keyword::Infer),
626 "declare" => Some(Keyword::Declare),
627 "global" => Some(Keyword::Global),
628 _ => None,
629 }
630 }
631}