TypeScript_Rust_Compiler/
lexer.rs

1//! Lexical analysis for TypeScript code
2
3use crate::error::{CompilerError, Result};
4use serde::{Deserialize, Serialize};
5
6/// Token types for TypeScript
7#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub enum Token {
9    // Literals
10    Number(f64),
11    String(String),
12    TemplateLiteral(String),
13    Boolean(bool),
14    Null,
15    Undefined,
16
17    // Identifiers and keywords
18    Identifier(String),
19    Keyword(Keyword),
20
21    // Operators
22    Plus,
23    Minus,
24    Multiply,
25    Divide,
26    Modulo,
27    Equal,
28    NotEqual,
29    StrictEqual,
30    StrictNotEqual,
31    LessThan,
32    GreaterThan,
33    LessEqual,
34    GreaterEqual,
35    And,
36    Or,
37    Not,
38    Assign,
39    PlusAssign,
40    MinusAssign,
41    MultiplyAssign,
42    DivideAssign,
43
44    // Delimiters
45    LeftParen,
46    RightParen,
47    LeftBrace,
48    RightBrace,
49    LeftBracket,
50    RightBracket,
51    Semicolon,
52    Comma,
53    Dot,
54    Colon,
55    QuestionMark,
56    Arrow,
57
58    // Type annotations
59    TypeAnnotation,
60    GenericStart,
61    GenericEnd,
62
63    // Special
64    Newline,
65    Whitespace,
66    Comment(String),
67    EOF,
68}
69
70/// TypeScript keywords
71#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
72pub enum Keyword {
73    // Declarations
74    Let,
75    Const,
76    Var,
77    Function,
78    Class,
79    Interface,
80    Type,
81    Enum,
82    Namespace,
83    Module,
84    Import,
85    Export,
86    From,
87    As,
88    Default,
89
90    // Control flow
91    If,
92    Else,
93    Switch,
94    Case,
95    DefaultCase,
96    For,
97    While,
98    Do,
99    Break,
100    Continue,
101    Return,
102    Throw,
103    Try,
104    Catch,
105    Finally,
106
107    // OOP
108    Extends,
109    Implements,
110    Super,
111    This,
112    New,
113    Static,
114    Public,
115    Private,
116    Protected,
117    Abstract,
118    Readonly,
119
120    // Async
121    Async,
122    Await,
123    Promise,
124
125    // Types
126    Any,
127    Unknown,
128    Never,
129    Void,
130    Null,
131    Undefined,
132    Boolean,
133    Number,
134    String,
135    Object,
136    Array,
137    Tuple,
138    Union,
139    Intersection,
140    Literal,
141    Mapped,
142    Conditional,
143    Template,
144
145    // Utility types
146    Partial,
147    Required,
148    Pick,
149    Omit,
150    Record,
151    Exclude,
152    Extract,
153    NonNullable,
154    Parameters,
155    ReturnType,
156    InstanceType,
157    ThisParameterType,
158    OmitThisParameter,
159    ThisType,
160
161    // Other
162    True,
163    False,
164    In,
165    Of,
166    Instanceof,
167    Typeof,
168    Keyof,
169    Is,
170    Asserts,
171    Infer,
172    Declare,
173    Ambient,
174    Global,
175}
176
177/// Lexer for TypeScript code
178pub struct Lexer {
179    input: String,
180    position: usize,
181    line: usize,
182    column: usize,
183}
184
185impl Lexer {
186    /// Create a new lexer
187    pub fn new(input: String) -> Self {
188        Self {
189            input,
190            position: 0,
191            line: 1,
192            column: 1,
193        }
194    }
195
196    /// Tokenize the input string
197    pub fn tokenize(&mut self) -> Result<Vec<Token>> {
198        let mut tokens = Vec::new();
199
200        while self.position < self.input.len() {
201            match self.next_token()? {
202                Some(token) => {
203                    println!("Token: {:?}", token);
204                    tokens.push(token);
205                }
206                None => break,
207            }
208        }
209
210        tokens.push(Token::EOF);
211        Ok(tokens)
212    }
213
214    /// Get the next token
215    fn next_token(&mut self) -> Result<Option<Token>> {
216        self.skip_whitespace();
217
218        if self.position >= self.input.len() {
219            return Ok(None);
220        }
221
222        let ch = self.current_char();
223        let token = match ch {
224            '+' => {
225                if self.peek_char() == Some('=') {
226                    self.advance();
227                    Ok(Some(Token::PlusAssign))
228                } else if self.peek_char() == Some('+') {
229                    self.advance();
230                    Ok(Some(Token::Plus)) // ++ operator
231                } else {
232                    Ok(Some(Token::Plus))
233                }
234            }
235            '-' => {
236                if self.peek_char() == Some('=') {
237                    self.advance();
238                    Ok(Some(Token::MinusAssign))
239                } else if self.peek_char() == Some('>') {
240                    self.advance();
241                    Ok(Some(Token::Arrow))
242                } else {
243                    Ok(Some(Token::Minus))
244                }
245            }
246            '*' => {
247                if self.peek_char() == Some('=') {
248                    self.advance();
249                    Ok(Some(Token::MultiplyAssign))
250                } else {
251                    Ok(Some(Token::Multiply))
252                }
253            }
254            '/' => {
255                if self.peek_char() == Some('=') {
256                    self.advance();
257                    Ok(Some(Token::DivideAssign))
258                } else if self.peek_char() == Some('/') {
259                    self.advance();
260                    self.skip_line_comment();
261                    Ok(None)
262                } else if self.peek_char() == Some('*') {
263                    self.advance();
264                    self.skip_block_comment();
265                    Ok(None)
266                } else {
267                    Ok(Some(Token::Divide))
268                }
269            }
270            '%' => Ok(Some(Token::Modulo)),
271            '=' => {
272                if self.peek_char() == Some('=') {
273                    self.advance();
274                    if self.peek_char() == Some('=') {
275                        self.advance();
276                        Ok(Some(Token::StrictEqual))
277                    } else {
278                        Ok(Some(Token::Equal))
279                    }
280                } else {
281                    Ok(Some(Token::Assign))
282                }
283            }
284            '!' => {
285                if self.peek_char() == Some('=') {
286                    self.advance();
287                    if self.peek_char() == Some('=') {
288                        self.advance();
289                        Ok(Some(Token::StrictNotEqual))
290                    } else {
291                        Ok(Some(Token::NotEqual))
292                    }
293                } else {
294                    Ok(Some(Token::Not))
295                }
296            }
297            '<' => {
298                if self.peek_char() == Some('=') {
299                    self.advance();
300                    Ok(Some(Token::LessEqual))
301                } else {
302                    Ok(Some(Token::LessThan))
303                }
304            }
305            '>' => {
306                if self.peek_char() == Some('=') {
307                    self.advance();
308                    Ok(Some(Token::GreaterEqual))
309                } else {
310                    Ok(Some(Token::GreaterThan))
311                }
312            }
313            '&' => {
314                if self.peek_char() == Some('&') {
315                    self.advance();
316                    Ok(Some(Token::And))
317                } else {
318                    return Err(CompilerError::parse_error(
319                        self.line,
320                        self.column,
321                        "Unexpected character: &",
322                    ));
323                }
324            }
325            '|' => {
326                if self.peek_char() == Some('|') {
327                    self.advance();
328                    Ok(Some(Token::Or))
329                } else {
330                    return Err(CompilerError::parse_error(
331                        self.line,
332                        self.column,
333                        "Unexpected character: |",
334                    ));
335                }
336            }
337            '(' => Ok(Some(Token::LeftParen)),
338            ')' => Ok(Some(Token::RightParen)),
339            '{' => Ok(Some(Token::LeftBrace)),
340            '}' => Ok(Some(Token::RightBrace)),
341            '[' => Ok(Some(Token::LeftBracket)),
342            ']' => Ok(Some(Token::RightBracket)),
343            ';' => Ok(Some(Token::Semicolon)),
344            ',' => Ok(Some(Token::Comma)),
345            '.' => Ok(Some(Token::Dot)),
346            ':' => Ok(Some(Token::Colon)),
347            '?' => Ok(Some(Token::QuestionMark)),
348            '"' | '\'' => Ok(self.parse_string()?),
349            '`' => Ok(self.parse_template_literal()?),
350            '0'..='9' => Ok(self.parse_number()?),
351            'a'..='z' | 'A'..='Z' | '_' | '$' => Ok(self.parse_identifier_or_keyword()?),
352            _ => {
353                return Err(CompilerError::parse_error(
354                    self.line,
355                    self.column,
356                    format!("Unexpected character: {}", ch),
357                ));
358            }
359        };
360
361        // Only advance for simple tokens that don't manage position themselves
362        match ch {
363            'a'..='z' | 'A'..='Z' | '_' | '$' => {
364                // parse_identifier_or_keyword manages position itself
365            }
366            '0'..='9' => {
367                // parse_number manages position itself
368            }
369            '"' | '\'' => {
370                // parse_string manages position itself
371            }
372            _ => {
373                // Simple tokens need to advance
374                self.advance();
375            }
376        }
377        token
378    }
379
380    /// Get current character
381    fn current_char(&self) -> char {
382        self.input.chars().nth(self.position).unwrap_or('\0')
383    }
384
385    /// Peek at next character
386    fn peek_char(&self) -> Option<char> {
387        self.input.chars().nth(self.position + 1)
388    }
389
390    /// Advance position
391    fn advance(&mut self) {
392        if self.current_char() == '\n' {
393            self.line += 1;
394            self.column = 1;
395        } else {
396            self.column += 1;
397        }
398        self.position += 1;
399    }
400
401    /// Skip whitespace
402    fn skip_whitespace(&mut self) {
403        while self.position < self.input.len() {
404            let ch = self.current_char();
405            if ch.is_whitespace() {
406                self.advance();
407            } else if ch == '/' && self.peek_char() == Some('/') {
408                // Skip line comment
409                self.advance(); // skip first /
410                self.advance(); // skip second /
411                while self.position < self.input.len() && self.current_char() != '\n' {
412                    self.advance();
413                }
414            } else if ch == '/' && self.peek_char() == Some('*') {
415                // Skip block comment
416                self.advance(); // skip /
417                self.advance(); // skip *
418                while self.position < self.input.len() {
419                    if self.current_char() == '*' && self.peek_char() == Some('/') {
420                        self.advance(); // skip *
421                        self.advance(); // skip /
422                        break;
423                    }
424                    self.advance();
425                }
426            } else {
427                break;
428            }
429        }
430    }
431
432    /// Skip line comment
433    fn skip_line_comment(&mut self) -> Option<Token> {
434        while self.position < self.input.len() && self.current_char() != '\n' {
435            self.advance();
436        }
437        None
438    }
439
440    /// Skip block comment
441    fn skip_block_comment(&mut self) -> Option<Token> {
442        while self.position < self.input.len() {
443            if self.current_char() == '*' && self.peek_char() == Some('/') {
444                self.advance();
445                self.advance();
446                break;
447            }
448            self.advance();
449        }
450        None
451    }
452
453    /// Parse string literal
454    fn parse_string(&mut self) -> Result<Option<Token>> {
455        let quote = self.current_char();
456        let mut value = String::new();
457        self.advance();
458
459        while self.position < self.input.len() {
460            let ch = self.current_char();
461            if ch == quote {
462                self.advance();
463                return Ok(Some(Token::String(value)));
464            } else if ch == '\\' {
465                self.advance();
466                if self.position < self.input.len() {
467                    let escaped = self.current_char();
468                    value.push(match escaped {
469                        'n' => '\n',
470                        't' => '\t',
471                        'r' => '\r',
472                        '\\' => '\\',
473                        '"' => '"',
474                        '\'' => '\'',
475                        _ => escaped,
476                    });
477                    self.advance();
478                }
479            } else {
480                value.push(ch);
481                self.advance();
482            }
483        }
484
485        Err(CompilerError::parse_error(
486            self.line,
487            self.column,
488            "Unterminated string literal",
489        ))
490    }
491
492    /// Parse template literal
493    fn parse_template_literal(&mut self) -> Result<Option<Token>> {
494        let mut value = String::new();
495        self.advance(); // consume opening backtick
496
497        while self.position < self.input.len() {
498            let ch = self.current_char();
499            if ch == '`' {
500                self.advance();
501                return Ok(Some(Token::TemplateLiteral(value)));
502            } else if ch == '\\' {
503                self.advance();
504                if self.position < self.input.len() {
505                    let escaped = self.current_char();
506                    value.push(match escaped {
507                        'n' => '\n',
508                        't' => '\t',
509                        'r' => '\r',
510                        '\\' => '\\',
511                        '`' => '`',
512                        '$' => '$',
513                        _ => escaped,
514                    });
515                    self.advance();
516                }
517            } else if ch == '$' && self.position + 1 < self.input.len() && self.input.chars().nth(self.position + 1) == Some('{') {
518                // Handle ${} interpolation - include the full ${} in the string for now
519                value.push('$');
520                self.advance();
521                if self.position < self.input.len() {
522                    value.push('{');
523                    self.advance();
524                    // Skip to closing brace
525                    while self.position < self.input.len() && self.current_char() != '}' {
526                        value.push(self.current_char());
527                        self.advance();
528                    }
529                    if self.position < self.input.len() {
530                        value.push('}');
531                        self.advance();
532                    }
533                }
534            } else {
535                value.push(ch);
536                self.advance();
537            }
538        }
539
540        Err(CompilerError::parse_error(
541            self.line,
542            self.column,
543            "Unterminated template literal",
544        ))
545    }
546
547    /// Parse number literal
548    fn parse_number(&mut self) -> Result<Option<Token>> {
549        let mut value = String::new();
550        let mut has_dot = false;
551
552        while self.position < self.input.len() {
553            let ch = self.current_char();
554            if ch.is_ascii_digit() {
555                value.push(ch);
556                self.advance();
557            } else if ch == '.' && !has_dot {
558                has_dot = true;
559                value.push(ch);
560                self.advance();
561            } else {
562                break;
563            }
564        }
565
566        let number: f64 = value.parse().map_err(|_| {
567            CompilerError::parse_error(self.line, self.column, "Invalid number literal")
568        })?;
569
570        Ok(Some(Token::Number(number)))
571    }
572
573    /// Parse identifier or keyword
574    fn parse_identifier_or_keyword(&mut self) -> Result<Option<Token>> {
575        let mut value = String::new();
576
577        while self.position < self.input.len() {
578            let ch = self.current_char();
579            if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
580                value.push(ch);
581                self.advance();
582            } else {
583                break;
584            }
585        }
586
587        // Check if it's a keyword
588        if let Some(keyword) = self.parse_keyword(&value) {
589            Ok(Some(Token::Keyword(keyword)))
590        } else {
591            Ok(Some(Token::Identifier(value)))
592        }
593    }
594
595    /// Parse keyword from string
596    fn parse_keyword(&self, value: &str) -> Option<Keyword> {
597        match value {
598            "let" => Some(Keyword::Let),
599            "const" => Some(Keyword::Const),
600            "var" => Some(Keyword::Var),
601            "function" => Some(Keyword::Function),
602            "class" => Some(Keyword::Class),
603            "interface" => Some(Keyword::Interface),
604            "type" => Some(Keyword::Type),
605            "enum" => Some(Keyword::Enum),
606            "namespace" => Some(Keyword::Namespace),
607            "module" => Some(Keyword::Module),
608            "import" => Some(Keyword::Import),
609            "export" => Some(Keyword::Export),
610            "from" => Some(Keyword::From),
611            "as" => Some(Keyword::As),
612            "default" => Some(Keyword::Default),
613            "if" => Some(Keyword::If),
614            "else" => Some(Keyword::Else),
615            "switch" => Some(Keyword::Switch),
616            "case" => Some(Keyword::Case),
617            "for" => Some(Keyword::For),
618            "while" => Some(Keyword::While),
619            "do" => Some(Keyword::Do),
620            "break" => Some(Keyword::Break),
621            "continue" => Some(Keyword::Continue),
622            "return" => Some(Keyword::Return),
623            "throw" => Some(Keyword::Throw),
624            "try" => Some(Keyword::Try),
625            "catch" => Some(Keyword::Catch),
626            "finally" => Some(Keyword::Finally),
627            "extends" => Some(Keyword::Extends),
628            "implements" => Some(Keyword::Implements),
629            "super" => Some(Keyword::Super),
630            "this" => Some(Keyword::This),
631            "new" => Some(Keyword::New),
632            "static" => Some(Keyword::Static),
633            "public" => Some(Keyword::Public),
634            "private" => Some(Keyword::Private),
635            "protected" => Some(Keyword::Protected),
636            "abstract" => Some(Keyword::Abstract),
637            "readonly" => Some(Keyword::Readonly),
638            "async" => Some(Keyword::Async),
639            "await" => Some(Keyword::Await),
640            "Promise" => Some(Keyword::Promise),
641            "any" => Some(Keyword::Any),
642            "unknown" => Some(Keyword::Unknown),
643            "never" => Some(Keyword::Never),
644            "void" => Some(Keyword::Void),
645            "null" => Some(Keyword::Null),
646            "undefined" => Some(Keyword::Undefined),
647            "boolean" => Some(Keyword::Boolean),
648            "number" => Some(Keyword::Number),
649            "string" => Some(Keyword::String),
650            "object" => Some(Keyword::Object),
651            "Array" => Some(Keyword::Array),
652            "true" => Some(Keyword::True),
653            "false" => Some(Keyword::False),
654            "in" => Some(Keyword::In),
655            "of" => Some(Keyword::Of),
656            "instanceof" => Some(Keyword::Instanceof),
657            "typeof" => Some(Keyword::Typeof),
658            "keyof" => Some(Keyword::Keyof),
659            "is" => Some(Keyword::Is),
660            "asserts" => Some(Keyword::Asserts),
661            "infer" => Some(Keyword::Infer),
662            "declare" => Some(Keyword::Declare),
663            "global" => Some(Keyword::Global),
664            _ => None,
665        }
666    }
667}