TypeScript_Rust_Compiler/
lexer.rs

1//! Lexical analysis for TypeScript code
2
3use crate::error::{CompilerError, Result};
4use serde::{Deserialize, Serialize};
5
6/// Token types for TypeScript
7#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub enum Token {
9    // Literals
10    Number(f64),
11    String(String),
12    TemplateLiteral(String),
13    Boolean(bool),
14    Null,
15    Undefined,
16
17    // Identifiers and keywords
18    Identifier(String),
19    Keyword(Keyword),
20
21    // Operators
22    Plus,
23    Minus,
24    Multiply,
25    Divide,
26    Modulo,
27    Equal,
28    NotEqual,
29    StrictEqual,
30    StrictNotEqual,
31    LessThan,
32    GreaterThan,
33    LessEqual,
34    GreaterEqual,
35    And,
36    Or,
37    Not,
38    Assign,
39    Arrow, // =>
40    PlusAssign,
41    MinusAssign,
42    MultiplyAssign,
43    DivideAssign,
44    Union, // |
45    Intersection, // &
46
47    // Delimiters
48    LeftParen,
49    RightParen,
50    LeftBrace,
51    RightBrace,
52    LeftBracket,
53    RightBracket,
54    Semicolon,
55    Comma,
56    Dot,
57    Colon,
58    QuestionMark,
59
60    // Type annotations
61    TypeAnnotation,
62    GenericStart,
63    GenericEnd,
64
65    // Special
66    Newline,
67    Whitespace,
68    Comment(String),
69    EOF,
70}
71
72/// TypeScript keywords
73#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
74pub enum Keyword {
75    // Declarations
76    Let,
77    Const,
78    Var,
79    Function,
80    Class,
81    Interface,
82    Type,
83    Enum,
84    Namespace,
85    Module,
86    Import,
87    Export,
88    From,
89    As,
90    Default,
91
92    // Control flow
93    If,
94    Else,
95    Switch,
96    Case,
97    DefaultCase,
98    For,
99    While,
100    Do,
101    Break,
102    Continue,
103    Return,
104    Throw,
105    Try,
106    Catch,
107    Finally,
108
109    // OOP
110    Extends,
111    Implements,
112    Super,
113    This,
114    New,
115    Static,
116    Public,
117    Private,
118    Protected,
119    Abstract,
120    Readonly,
121    Get,
122    Set,
123
124    // Async
125    Async,
126    Await,
127    Promise,
128
129    // Types
130    Any,
131    Unknown,
132    Never,
133    Void,
134    Null,
135    Undefined,
136    Boolean,
137    Number,
138    String,
139    Object,
140    Array,
141    Tuple,
142    Union,
143    Intersection,
144    Literal,
145    Mapped,
146    Conditional,
147    Template,
148
149    // Utility types
150    Partial,
151    Required,
152    Pick,
153    Omit,
154    Record,
155    Exclude,
156    Extract,
157    NonNullable,
158    Parameters,
159    ReturnType,
160    InstanceType,
161    ThisParameterType,
162    OmitThisParameter,
163    ThisType,
164
165    // Other
166    True,
167    False,
168    In,
169    Of,
170    Instanceof,
171    Typeof,
172    Keyof,
173    Key,
174    Is,
175    Asserts,
176    Infer,
177    Declare,
178    Ambient,
179    Global,
180}
181
182/// Lexer for TypeScript code
183pub struct Lexer {
184    input: String,
185    position: usize,
186    line: usize,
187    column: usize,
188}
189
190impl Lexer {
191    /// Create a new lexer
192    pub fn new(input: String) -> Self {
193        Self {
194            input,
195            position: 0,
196            line: 1,
197            column: 1,
198        }
199    }
200
201    /// Tokenize the input string
202    pub fn tokenize(&mut self) -> Result<Vec<Token>> {
203        let mut tokens = Vec::new();
204
205        while self.position < self.input.len() {
206            match self.next_token()? {
207                Some(token) => {
208                    println!("Token: {:?}", token);
209                    tokens.push(token);
210                }
211                None => break,
212            }
213        }
214
215        tokens.push(Token::EOF);
216        Ok(tokens)
217    }
218
219    /// Get the next token
220    fn next_token(&mut self) -> Result<Option<Token>> {
221        self.skip_whitespace();
222
223        if self.position >= self.input.len() {
224            return Ok(None);
225        }
226
227        let ch = self.current_char();
228        let token = match ch {
229            '+' => {
230                if self.peek_char() == Some('=') {
231                    self.advance();
232                    Ok(Some(Token::PlusAssign))
233                } else if self.peek_char() == Some('+') {
234                    self.advance();
235                    Ok(Some(Token::Plus)) // ++ operator
236                } else {
237                    Ok(Some(Token::Plus))
238                }
239            }
240            '-' => {
241                if self.peek_char() == Some('=') {
242                    self.advance();
243                    Ok(Some(Token::MinusAssign))
244                } else if self.peek_char() == Some('>') {
245                    self.advance();
246                    Ok(Some(Token::Arrow))
247                } else {
248                    Ok(Some(Token::Minus))
249                }
250            }
251            '*' => {
252                if self.peek_char() == Some('=') {
253                    self.advance();
254                    Ok(Some(Token::MultiplyAssign))
255                } else {
256                    Ok(Some(Token::Multiply))
257                }
258            }
259            '/' => {
260                if self.peek_char() == Some('=') {
261                    self.advance();
262                    Ok(Some(Token::DivideAssign))
263                } else if self.peek_char() == Some('/') {
264                    self.advance();
265                    self.skip_line_comment();
266                    Ok(None)
267                } else if self.peek_char() == Some('*') {
268                    self.advance();
269                    self.skip_block_comment();
270                    Ok(None)
271                } else {
272                    Ok(Some(Token::Divide))
273                }
274            }
275            '%' => Ok(Some(Token::Modulo)),
276            '=' => {
277                if self.peek_char() == Some('=') {
278                    self.advance();
279                    if self.peek_char() == Some('=') {
280                        self.advance();
281                        Ok(Some(Token::StrictEqual))
282                    } else {
283                        Ok(Some(Token::Equal))
284                    }
285                } else if self.peek_char() == Some('>') {
286                    self.advance();
287                    Ok(Some(Token::Arrow))
288                } else {
289                    Ok(Some(Token::Assign))
290                }
291            }
292            '!' => {
293                if self.peek_char() == Some('=') {
294                    self.advance();
295                    if self.peek_char() == Some('=') {
296                        self.advance();
297                        Ok(Some(Token::StrictNotEqual))
298                    } else {
299                        Ok(Some(Token::NotEqual))
300                    }
301                } else {
302                    Ok(Some(Token::Not))
303                }
304            }
305            '<' => {
306                if self.peek_char() == Some('=') {
307                    self.advance();
308                    Ok(Some(Token::LessEqual))
309                } else {
310                    Ok(Some(Token::LessThan))
311                }
312            }
313            '>' => {
314                if self.peek_char() == Some('=') {
315                    self.advance();
316                    Ok(Some(Token::GreaterEqual))
317                } else {
318                    Ok(Some(Token::GreaterThan))
319                }
320            }
321            '&' => {
322                if self.peek_char() == Some('&') {
323                    self.advance();
324                    Ok(Some(Token::And))
325                } else {
326                    Ok(Some(Token::Intersection))
327                }
328            }
329            '|' => {
330                if self.peek_char() == Some('|') {
331                    self.advance();
332                    Ok(Some(Token::Or))
333                } else {
334                    Ok(Some(Token::Union))
335                }
336            }
337            '(' => Ok(Some(Token::LeftParen)),
338            ')' => Ok(Some(Token::RightParen)),
339            '{' => Ok(Some(Token::LeftBrace)),
340            '}' => Ok(Some(Token::RightBrace)),
341            '[' => Ok(Some(Token::LeftBracket)),
342            ']' => Ok(Some(Token::RightBracket)),
343            ';' => Ok(Some(Token::Semicolon)),
344            ',' => Ok(Some(Token::Comma)),
345            '.' => Ok(Some(Token::Dot)),
346            ':' => Ok(Some(Token::Colon)),
347            '?' => Ok(Some(Token::QuestionMark)),
348            '"' | '\'' => Ok(self.parse_string()?),
349            '`' => Ok(self.parse_template_literal()?),
350            '0'..='9' => Ok(self.parse_number()?),
351            'a'..='z' | 'A'..='Z' | '_' | '$' => Ok(self.parse_identifier_or_keyword()?),
352            _ => {
353                return Err(CompilerError::parse_error(
354                    self.line,
355                    self.column,
356                    format!("Unexpected character: {}", ch),
357                ));
358            }
359        };
360
361        // Only advance for simple tokens that don't manage position themselves
362        match ch {
363            'a'..='z' | 'A'..='Z' | '_' | '$' => {
364                // parse_identifier_or_keyword manages position itself
365            }
366            '0'..='9' => {
367                // parse_number manages position itself
368            }
369            '"' | '\'' => {
370                // parse_string manages position itself
371            }
372            _ => {
373                // Simple tokens need to advance
374                self.advance();
375            }
376        }
377        token
378    }
379
380    /// Get current character
381    fn current_char(&self) -> char {
382        self.input.chars().nth(self.position).unwrap_or('\0')
383    }
384
385    /// Peek at next character
386    fn peek_char(&self) -> Option<char> {
387        self.input.chars().nth(self.position + 1)
388    }
389
390    /// Advance position
391    fn advance(&mut self) {
392        if self.current_char() == '\n' {
393            self.line += 1;
394            self.column = 1;
395        } else {
396            self.column += 1;
397        }
398        self.position += 1;
399    }
400
401    /// Skip whitespace
402    fn skip_whitespace(&mut self) {
403        while self.position < self.input.len() {
404            let ch = self.current_char();
405            if ch.is_whitespace() {
406                self.advance();
407            } else if ch == '/' && self.peek_char() == Some('/') {
408                // Skip line comment
409                self.advance(); // skip first /
410                self.advance(); // skip second /
411                while self.position < self.input.len() && self.current_char() != '\n' {
412                    self.advance();
413                }
414            } else if ch == '/' && self.peek_char() == Some('*') {
415                // Skip block comment
416                self.advance(); // skip /
417                self.advance(); // skip *
418                while self.position < self.input.len() {
419                    if self.current_char() == '*' && self.peek_char() == Some('/') {
420                        self.advance(); // skip *
421                        self.advance(); // skip /
422                        break;
423                    }
424                    self.advance();
425                }
426            } else {
427                break;
428            }
429        }
430    }
431
432    /// Skip line comment
433    fn skip_line_comment(&mut self) -> Option<Token> {
434        while self.position < self.input.len() && self.current_char() != '\n' {
435            self.advance();
436        }
437        None
438    }
439
440    /// Skip block comment
441    fn skip_block_comment(&mut self) -> Option<Token> {
442        while self.position < self.input.len() {
443            if self.current_char() == '*' && self.peek_char() == Some('/') {
444                self.advance();
445                self.advance();
446                break;
447            }
448            self.advance();
449        }
450        None
451    }
452
453    /// Parse string literal
454    fn parse_string(&mut self) -> Result<Option<Token>> {
455        let quote = self.current_char();
456        let mut value = String::new();
457        self.advance();
458
459        while self.position < self.input.len() {
460            let ch = self.current_char();
461            if ch == quote {
462                self.advance();
463                return Ok(Some(Token::String(value)));
464            } else if ch == '\\' {
465                self.advance();
466                if self.position < self.input.len() {
467                    let escaped = self.current_char();
468                    value.push(match escaped {
469                        'n' => '\n',
470                        't' => '\t',
471                        'r' => '\r',
472                        '\\' => '\\',
473                        '"' => '"',
474                        '\'' => '\'',
475                        _ => escaped,
476                    });
477                    self.advance();
478                }
479            } else {
480                value.push(ch);
481                self.advance();
482            }
483        }
484
485        Err(CompilerError::parse_error(
486            self.line,
487            self.column,
488            "Unterminated string literal",
489        ))
490    }
491
492    /// Parse template literal
493    fn parse_template_literal(&mut self) -> Result<Option<Token>> {
494        let mut value = String::new();
495        self.advance(); // consume opening backtick
496
497        while self.position < self.input.len() {
498            let ch = self.current_char();
499            if ch == '`' {
500                self.advance();
501                return Ok(Some(Token::TemplateLiteral(value)));
502            } else if ch == '\\' {
503                self.advance();
504                if self.position < self.input.len() {
505                    let escaped = self.current_char();
506                    value.push(match escaped {
507                        'n' => '\n',
508                        't' => '\t',
509                        'r' => '\r',
510                        '\\' => '\\',
511                        '`' => '`',
512                        '$' => '$',
513                        _ => escaped,
514                    });
515                    self.advance();
516                }
517            } else if ch == '$' && self.position + 1 < self.input.len() && self.input.chars().nth(self.position + 1) == Some('{') {
518                // Handle ${} interpolation - include the full ${} in the string for now
519                value.push('$');
520                self.advance();
521                if self.position < self.input.len() {
522                    value.push('{');
523                    self.advance();
524                    // Skip to closing brace
525                    while self.position < self.input.len() && self.current_char() != '}' {
526                        value.push(self.current_char());
527                        self.advance();
528                    }
529                    if self.position < self.input.len() {
530                        value.push('}');
531                        self.advance();
532                    }
533                }
534            } else {
535                value.push(ch);
536                self.advance();
537            }
538        }
539
540        Err(CompilerError::parse_error(
541            self.line,
542            self.column,
543            "Unterminated template literal",
544        ))
545    }
546
547    /// Parse number literal
548    fn parse_number(&mut self) -> Result<Option<Token>> {
549        let mut value = String::new();
550        let mut has_dot = false;
551
552        while self.position < self.input.len() {
553            let ch = self.current_char();
554            if ch.is_ascii_digit() {
555                value.push(ch);
556                self.advance();
557            } else if ch == '.' && !has_dot {
558                has_dot = true;
559                value.push(ch);
560                self.advance();
561            } else {
562                break;
563            }
564        }
565
566        let number: f64 = value.parse().map_err(|_| {
567            CompilerError::parse_error(self.line, self.column, "Invalid number literal")
568        })?;
569
570        Ok(Some(Token::Number(number)))
571    }
572
573    /// Parse identifier or keyword
574    fn parse_identifier_or_keyword(&mut self) -> Result<Option<Token>> {
575        let mut value = String::new();
576
577        while self.position < self.input.len() {
578            let ch = self.current_char();
579            if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
580                value.push(ch);
581                self.advance();
582            } else {
583                break;
584            }
585        }
586
587        // Check if it's a literal
588        if value == "true" {
589            Ok(Some(Token::Boolean(true)))
590        } else if value == "false" {
591            Ok(Some(Token::Boolean(false)))
592        } else if let Some(keyword) = self.parse_keyword(&value) {
593            Ok(Some(Token::Keyword(keyword)))
594        } else {
595            Ok(Some(Token::Identifier(value)))
596        }
597    }
598
599    /// Parse keyword from string
600    fn parse_keyword(&self, value: &str) -> Option<Keyword> {
601        match value {
602            "let" => Some(Keyword::Let),
603            "const" => Some(Keyword::Const),
604            "var" => Some(Keyword::Var),
605            "function" => Some(Keyword::Function),
606            "class" => Some(Keyword::Class),
607            "interface" => Some(Keyword::Interface),
608            "type" => Some(Keyword::Type),
609            "enum" => Some(Keyword::Enum),
610            "namespace" => Some(Keyword::Namespace),
611            "module" => Some(Keyword::Module),
612            "import" => Some(Keyword::Import),
613            "export" => Some(Keyword::Export),
614            "from" => Some(Keyword::From),
615            "as" => Some(Keyword::As),
616            "default" => Some(Keyword::Default),
617            "if" => Some(Keyword::If),
618            "else" => Some(Keyword::Else),
619            "switch" => Some(Keyword::Switch),
620            "case" => Some(Keyword::Case),
621            "for" => Some(Keyword::For),
622            "while" => Some(Keyword::While),
623            "do" => Some(Keyword::Do),
624            "break" => Some(Keyword::Break),
625            "continue" => Some(Keyword::Continue),
626            "return" => Some(Keyword::Return),
627            "throw" => Some(Keyword::Throw),
628            "try" => Some(Keyword::Try),
629            "catch" => Some(Keyword::Catch),
630            "finally" => Some(Keyword::Finally),
631            "extends" => Some(Keyword::Extends),
632            "implements" => Some(Keyword::Implements),
633            "super" => Some(Keyword::Super),
634            "this" => Some(Keyword::This),
635            "new" => Some(Keyword::New),
636            "static" => Some(Keyword::Static),
637            "public" => Some(Keyword::Public),
638            "private" => Some(Keyword::Private),
639            "protected" => Some(Keyword::Protected),
640            "abstract" => Some(Keyword::Abstract),
641            "readonly" => Some(Keyword::Readonly),
642            "get" => Some(Keyword::Get),
643            "set" => Some(Keyword::Set),
644            "async" => Some(Keyword::Async),
645            "await" => Some(Keyword::Await),
646            "Promise" => Some(Keyword::Promise),
647            "any" => Some(Keyword::Any),
648            "unknown" => Some(Keyword::Unknown),
649            "never" => Some(Keyword::Never),
650            "void" => Some(Keyword::Void),
651            "null" => Some(Keyword::Null),
652            "undefined" => Some(Keyword::Undefined),
653            "boolean" => Some(Keyword::Boolean),
654            "number" => Some(Keyword::Number),
655            "string" => Some(Keyword::String),
656            "object" => Some(Keyword::Object),
657            "Array" => Some(Keyword::Array),
658            "true" => Some(Keyword::True),
659            "false" => Some(Keyword::False),
660            "in" => Some(Keyword::In),
661            "of" => Some(Keyword::Of),
662            "instanceof" => Some(Keyword::Instanceof),
663            "typeof" => Some(Keyword::Typeof),
664            "keyof" => Some(Keyword::Keyof),
665            "key" => Some(Keyword::Key),
666            "is" => Some(Keyword::Is),
667            "asserts" => Some(Keyword::Asserts),
668            "infer" => Some(Keyword::Infer),
669            "declare" => Some(Keyword::Declare),
670            "global" => Some(Keyword::Global),
671            _ => None,
672        }
673    }
674}