TypeScript_Rust_Compiler/
lexer.rs

1//! Lexical analysis for TypeScript code
2
3use crate::error::{CompilerError, Result};
4use serde::{Deserialize, Serialize};
5
6/// Token types for TypeScript
7#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub enum Token {
9    // Literals
10    Number(f64),
11    String(String),
12    TemplateLiteral(String),
13    Boolean(bool),
14    Null,
15    Undefined,
16
17    // Identifiers and keywords
18    Identifier(String),
19    Keyword(Keyword),
20
21    // Operators
22    Plus,
23    Minus,
24    Multiply,
25    Divide,
26    Modulo,
27    Equal,
28    NotEqual,
29    StrictEqual,
30    StrictNotEqual,
31    LessThan,
32    GreaterThan,
33    LessEqual,
34    GreaterEqual,
35    And,
36    Or,
37    Not,
38    Assign,
39    PlusAssign,
40    MinusAssign,
41    MultiplyAssign,
42    DivideAssign,
43    Union, // |
44    Intersection, // &
45
46    // Delimiters
47    LeftParen,
48    RightParen,
49    LeftBrace,
50    RightBrace,
51    LeftBracket,
52    RightBracket,
53    Semicolon,
54    Comma,
55    Dot,
56    Colon,
57    QuestionMark,
58    Arrow,
59
60    // Type annotations
61    TypeAnnotation,
62    GenericStart,
63    GenericEnd,
64
65    // Special
66    Newline,
67    Whitespace,
68    Comment(String),
69    EOF,
70}
71
72/// TypeScript keywords
73#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
74pub enum Keyword {
75    // Declarations
76    Let,
77    Const,
78    Var,
79    Function,
80    Class,
81    Interface,
82    Type,
83    Enum,
84    Namespace,
85    Module,
86    Import,
87    Export,
88    From,
89    As,
90    Default,
91
92    // Control flow
93    If,
94    Else,
95    Switch,
96    Case,
97    DefaultCase,
98    For,
99    While,
100    Do,
101    Break,
102    Continue,
103    Return,
104    Throw,
105    Try,
106    Catch,
107    Finally,
108
109    // OOP
110    Extends,
111    Implements,
112    Super,
113    This,
114    New,
115    Static,
116    Public,
117    Private,
118    Protected,
119    Abstract,
120    Readonly,
121
122    // Async
123    Async,
124    Await,
125    Promise,
126
127    // Types
128    Any,
129    Unknown,
130    Never,
131    Void,
132    Null,
133    Undefined,
134    Boolean,
135    Number,
136    String,
137    Object,
138    Array,
139    Tuple,
140    Union,
141    Intersection,
142    Literal,
143    Mapped,
144    Conditional,
145    Template,
146
147    // Utility types
148    Partial,
149    Required,
150    Pick,
151    Omit,
152    Record,
153    Exclude,
154    Extract,
155    NonNullable,
156    Parameters,
157    ReturnType,
158    InstanceType,
159    ThisParameterType,
160    OmitThisParameter,
161    ThisType,
162
163    // Other
164    True,
165    False,
166    In,
167    Of,
168    Instanceof,
169    Typeof,
170    Keyof,
171    Key,
172    Is,
173    Asserts,
174    Infer,
175    Declare,
176    Ambient,
177    Global,
178}
179
180/// Lexer for TypeScript code
181pub struct Lexer {
182    input: String,
183    position: usize,
184    line: usize,
185    column: usize,
186}
187
188impl Lexer {
189    /// Create a new lexer
190    pub fn new(input: String) -> Self {
191        Self {
192            input,
193            position: 0,
194            line: 1,
195            column: 1,
196        }
197    }
198
199    /// Tokenize the input string
200    pub fn tokenize(&mut self) -> Result<Vec<Token>> {
201        let mut tokens = Vec::new();
202
203        while self.position < self.input.len() {
204            match self.next_token()? {
205                Some(token) => {
206                    println!("Token: {:?}", token);
207                    tokens.push(token);
208                }
209                None => break,
210            }
211        }
212
213        tokens.push(Token::EOF);
214        Ok(tokens)
215    }
216
217    /// Get the next token
218    fn next_token(&mut self) -> Result<Option<Token>> {
219        self.skip_whitespace();
220
221        if self.position >= self.input.len() {
222            return Ok(None);
223        }
224
225        let ch = self.current_char();
226        let token = match ch {
227            '+' => {
228                if self.peek_char() == Some('=') {
229                    self.advance();
230                    Ok(Some(Token::PlusAssign))
231                } else if self.peek_char() == Some('+') {
232                    self.advance();
233                    Ok(Some(Token::Plus)) // ++ operator
234                } else {
235                    Ok(Some(Token::Plus))
236                }
237            }
238            '-' => {
239                if self.peek_char() == Some('=') {
240                    self.advance();
241                    Ok(Some(Token::MinusAssign))
242                } else if self.peek_char() == Some('>') {
243                    self.advance();
244                    Ok(Some(Token::Arrow))
245                } else {
246                    Ok(Some(Token::Minus))
247                }
248            }
249            '*' => {
250                if self.peek_char() == Some('=') {
251                    self.advance();
252                    Ok(Some(Token::MultiplyAssign))
253                } else {
254                    Ok(Some(Token::Multiply))
255                }
256            }
257            '/' => {
258                if self.peek_char() == Some('=') {
259                    self.advance();
260                    Ok(Some(Token::DivideAssign))
261                } else if self.peek_char() == Some('/') {
262                    self.advance();
263                    self.skip_line_comment();
264                    Ok(None)
265                } else if self.peek_char() == Some('*') {
266                    self.advance();
267                    self.skip_block_comment();
268                    Ok(None)
269                } else {
270                    Ok(Some(Token::Divide))
271                }
272            }
273            '%' => Ok(Some(Token::Modulo)),
274            '=' => {
275                if self.peek_char() == Some('=') {
276                    self.advance();
277                    if self.peek_char() == Some('=') {
278                        self.advance();
279                        Ok(Some(Token::StrictEqual))
280                    } else {
281                        Ok(Some(Token::Equal))
282                    }
283                } else {
284                    Ok(Some(Token::Assign))
285                }
286            }
287            '!' => {
288                if self.peek_char() == Some('=') {
289                    self.advance();
290                    if self.peek_char() == Some('=') {
291                        self.advance();
292                        Ok(Some(Token::StrictNotEqual))
293                    } else {
294                        Ok(Some(Token::NotEqual))
295                    }
296                } else {
297                    Ok(Some(Token::Not))
298                }
299            }
300            '<' => {
301                if self.peek_char() == Some('=') {
302                    self.advance();
303                    Ok(Some(Token::LessEqual))
304                } else {
305                    Ok(Some(Token::LessThan))
306                }
307            }
308            '>' => {
309                if self.peek_char() == Some('=') {
310                    self.advance();
311                    Ok(Some(Token::GreaterEqual))
312                } else {
313                    Ok(Some(Token::GreaterThan))
314                }
315            }
316            '&' => {
317                if self.peek_char() == Some('&') {
318                    self.advance();
319                    Ok(Some(Token::And))
320                } else {
321                    Ok(Some(Token::Intersection))
322                }
323            }
324            '|' => {
325                if self.peek_char() == Some('|') {
326                    self.advance();
327                    Ok(Some(Token::Or))
328                } else {
329                    Ok(Some(Token::Union))
330                }
331            }
332            '(' => Ok(Some(Token::LeftParen)),
333            ')' => Ok(Some(Token::RightParen)),
334            '{' => Ok(Some(Token::LeftBrace)),
335            '}' => Ok(Some(Token::RightBrace)),
336            '[' => Ok(Some(Token::LeftBracket)),
337            ']' => Ok(Some(Token::RightBracket)),
338            ';' => Ok(Some(Token::Semicolon)),
339            ',' => Ok(Some(Token::Comma)),
340            '.' => Ok(Some(Token::Dot)),
341            ':' => Ok(Some(Token::Colon)),
342            '?' => Ok(Some(Token::QuestionMark)),
343            '"' | '\'' => Ok(self.parse_string()?),
344            '`' => Ok(self.parse_template_literal()?),
345            '0'..='9' => Ok(self.parse_number()?),
346            'a'..='z' | 'A'..='Z' | '_' | '$' => Ok(self.parse_identifier_or_keyword()?),
347            _ => {
348                return Err(CompilerError::parse_error(
349                    self.line,
350                    self.column,
351                    format!("Unexpected character: {}", ch),
352                ));
353            }
354        };
355
356        // Only advance for simple tokens that don't manage position themselves
357        match ch {
358            'a'..='z' | 'A'..='Z' | '_' | '$' => {
359                // parse_identifier_or_keyword manages position itself
360            }
361            '0'..='9' => {
362                // parse_number manages position itself
363            }
364            '"' | '\'' => {
365                // parse_string manages position itself
366            }
367            _ => {
368                // Simple tokens need to advance
369                self.advance();
370            }
371        }
372        token
373    }
374
375    /// Get current character
376    fn current_char(&self) -> char {
377        self.input.chars().nth(self.position).unwrap_or('\0')
378    }
379
380    /// Peek at next character
381    fn peek_char(&self) -> Option<char> {
382        self.input.chars().nth(self.position + 1)
383    }
384
385    /// Advance position
386    fn advance(&mut self) {
387        if self.current_char() == '\n' {
388            self.line += 1;
389            self.column = 1;
390        } else {
391            self.column += 1;
392        }
393        self.position += 1;
394    }
395
396    /// Skip whitespace
397    fn skip_whitespace(&mut self) {
398        while self.position < self.input.len() {
399            let ch = self.current_char();
400            if ch.is_whitespace() {
401                self.advance();
402            } else if ch == '/' && self.peek_char() == Some('/') {
403                // Skip line comment
404                self.advance(); // skip first /
405                self.advance(); // skip second /
406                while self.position < self.input.len() && self.current_char() != '\n' {
407                    self.advance();
408                }
409            } else if ch == '/' && self.peek_char() == Some('*') {
410                // Skip block comment
411                self.advance(); // skip /
412                self.advance(); // skip *
413                while self.position < self.input.len() {
414                    if self.current_char() == '*' && self.peek_char() == Some('/') {
415                        self.advance(); // skip *
416                        self.advance(); // skip /
417                        break;
418                    }
419                    self.advance();
420                }
421            } else {
422                break;
423            }
424        }
425    }
426
427    /// Skip line comment
428    fn skip_line_comment(&mut self) -> Option<Token> {
429        while self.position < self.input.len() && self.current_char() != '\n' {
430            self.advance();
431        }
432        None
433    }
434
435    /// Skip block comment
436    fn skip_block_comment(&mut self) -> Option<Token> {
437        while self.position < self.input.len() {
438            if self.current_char() == '*' && self.peek_char() == Some('/') {
439                self.advance();
440                self.advance();
441                break;
442            }
443            self.advance();
444        }
445        None
446    }
447
448    /// Parse string literal
449    fn parse_string(&mut self) -> Result<Option<Token>> {
450        let quote = self.current_char();
451        let mut value = String::new();
452        self.advance();
453
454        while self.position < self.input.len() {
455            let ch = self.current_char();
456            if ch == quote {
457                self.advance();
458                return Ok(Some(Token::String(value)));
459            } else if ch == '\\' {
460                self.advance();
461                if self.position < self.input.len() {
462                    let escaped = self.current_char();
463                    value.push(match escaped {
464                        'n' => '\n',
465                        't' => '\t',
466                        'r' => '\r',
467                        '\\' => '\\',
468                        '"' => '"',
469                        '\'' => '\'',
470                        _ => escaped,
471                    });
472                    self.advance();
473                }
474            } else {
475                value.push(ch);
476                self.advance();
477            }
478        }
479
480        Err(CompilerError::parse_error(
481            self.line,
482            self.column,
483            "Unterminated string literal",
484        ))
485    }
486
487    /// Parse template literal
488    fn parse_template_literal(&mut self) -> Result<Option<Token>> {
489        let mut value = String::new();
490        self.advance(); // consume opening backtick
491
492        while self.position < self.input.len() {
493            let ch = self.current_char();
494            if ch == '`' {
495                self.advance();
496                return Ok(Some(Token::TemplateLiteral(value)));
497            } else if ch == '\\' {
498                self.advance();
499                if self.position < self.input.len() {
500                    let escaped = self.current_char();
501                    value.push(match escaped {
502                        'n' => '\n',
503                        't' => '\t',
504                        'r' => '\r',
505                        '\\' => '\\',
506                        '`' => '`',
507                        '$' => '$',
508                        _ => escaped,
509                    });
510                    self.advance();
511                }
512            } else if ch == '$' && self.position + 1 < self.input.len() && self.input.chars().nth(self.position + 1) == Some('{') {
513                // Handle ${} interpolation - include the full ${} in the string for now
514                value.push('$');
515                self.advance();
516                if self.position < self.input.len() {
517                    value.push('{');
518                    self.advance();
519                    // Skip to closing brace
520                    while self.position < self.input.len() && self.current_char() != '}' {
521                        value.push(self.current_char());
522                        self.advance();
523                    }
524                    if self.position < self.input.len() {
525                        value.push('}');
526                        self.advance();
527                    }
528                }
529            } else {
530                value.push(ch);
531                self.advance();
532            }
533        }
534
535        Err(CompilerError::parse_error(
536            self.line,
537            self.column,
538            "Unterminated template literal",
539        ))
540    }
541
542    /// Parse number literal
543    fn parse_number(&mut self) -> Result<Option<Token>> {
544        let mut value = String::new();
545        let mut has_dot = false;
546
547        while self.position < self.input.len() {
548            let ch = self.current_char();
549            if ch.is_ascii_digit() {
550                value.push(ch);
551                self.advance();
552            } else if ch == '.' && !has_dot {
553                has_dot = true;
554                value.push(ch);
555                self.advance();
556            } else {
557                break;
558            }
559        }
560
561        let number: f64 = value.parse().map_err(|_| {
562            CompilerError::parse_error(self.line, self.column, "Invalid number literal")
563        })?;
564
565        Ok(Some(Token::Number(number)))
566    }
567
568    /// Parse identifier or keyword
569    fn parse_identifier_or_keyword(&mut self) -> Result<Option<Token>> {
570        let mut value = String::new();
571
572        while self.position < self.input.len() {
573            let ch = self.current_char();
574            if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
575                value.push(ch);
576                self.advance();
577            } else {
578                break;
579            }
580        }
581
582        // Check if it's a boolean literal
583        if value == "true" {
584            Ok(Some(Token::Boolean(true)))
585        } else if value == "false" {
586            Ok(Some(Token::Boolean(false)))
587        } else if let Some(keyword) = self.parse_keyword(&value) {
588            Ok(Some(Token::Keyword(keyword)))
589        } else {
590            Ok(Some(Token::Identifier(value)))
591        }
592    }
593
594    /// Parse keyword from string
595    fn parse_keyword(&self, value: &str) -> Option<Keyword> {
596        match value {
597            "let" => Some(Keyword::Let),
598            "const" => Some(Keyword::Const),
599            "var" => Some(Keyword::Var),
600            "function" => Some(Keyword::Function),
601            "class" => Some(Keyword::Class),
602            "interface" => Some(Keyword::Interface),
603            "type" => Some(Keyword::Type),
604            "enum" => Some(Keyword::Enum),
605            "namespace" => Some(Keyword::Namespace),
606            "module" => Some(Keyword::Module),
607            "import" => Some(Keyword::Import),
608            "export" => Some(Keyword::Export),
609            "from" => Some(Keyword::From),
610            "as" => Some(Keyword::As),
611            "default" => Some(Keyword::Default),
612            "if" => Some(Keyword::If),
613            "else" => Some(Keyword::Else),
614            "switch" => Some(Keyword::Switch),
615            "case" => Some(Keyword::Case),
616            "for" => Some(Keyword::For),
617            "while" => Some(Keyword::While),
618            "do" => Some(Keyword::Do),
619            "break" => Some(Keyword::Break),
620            "continue" => Some(Keyword::Continue),
621            "return" => Some(Keyword::Return),
622            "throw" => Some(Keyword::Throw),
623            "try" => Some(Keyword::Try),
624            "catch" => Some(Keyword::Catch),
625            "finally" => Some(Keyword::Finally),
626            "extends" => Some(Keyword::Extends),
627            "implements" => Some(Keyword::Implements),
628            "super" => Some(Keyword::Super),
629            "this" => Some(Keyword::This),
630            "new" => Some(Keyword::New),
631            "static" => Some(Keyword::Static),
632            "public" => Some(Keyword::Public),
633            "private" => Some(Keyword::Private),
634            "protected" => Some(Keyword::Protected),
635            "abstract" => Some(Keyword::Abstract),
636            "readonly" => Some(Keyword::Readonly),
637            "async" => Some(Keyword::Async),
638            "await" => Some(Keyword::Await),
639            "Promise" => Some(Keyword::Promise),
640            "any" => Some(Keyword::Any),
641            "unknown" => Some(Keyword::Unknown),
642            "never" => Some(Keyword::Never),
643            "void" => Some(Keyword::Void),
644            "null" => Some(Keyword::Null),
645            "undefined" => Some(Keyword::Undefined),
646            "boolean" => Some(Keyword::Boolean),
647            "number" => Some(Keyword::Number),
648            "string" => Some(Keyword::String),
649            "object" => Some(Keyword::Object),
650            "Array" => Some(Keyword::Array),
651            "true" => Some(Keyword::True),
652            "false" => Some(Keyword::False),
653            "in" => Some(Keyword::In),
654            "of" => Some(Keyword::Of),
655            "instanceof" => Some(Keyword::Instanceof),
656            "typeof" => Some(Keyword::Typeof),
657            "keyof" => Some(Keyword::Keyof),
658            "key" => Some(Keyword::Key),
659            "is" => Some(Keyword::Is),
660            "asserts" => Some(Keyword::Asserts),
661            "infer" => Some(Keyword::Infer),
662            "declare" => Some(Keyword::Declare),
663            "global" => Some(Keyword::Global),
664            _ => None,
665        }
666    }
667}