TypeScript_Rust_Compiler/
lexer.rs

1//! Lexical analysis for TypeScript code
2
3use crate::error::{CompilerError, Result};
4use serde::{Deserialize, Serialize};
5
6/// Token types for TypeScript
7#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub enum Token {
9    // Literals
10    Number(f64),
11    String(String),
12    TemplateLiteral(String),
13    Boolean(bool),
14    Null,
15    Undefined,
16
17    // Identifiers and keywords
18    Identifier(String),
19    Keyword(Keyword),
20
21    // Operators
22    Plus,
23    Minus,
24    Multiply,
25    Divide,
26    Modulo,
27    Equal,
28    NotEqual,
29    StrictEqual,
30    StrictNotEqual,
31    LessThan,
32    GreaterThan,
33    LessEqual,
34    GreaterEqual,
35    And,
36    Or,
37    Not,
38    Assign,
39    Arrow, // =>
40    PlusAssign,
41    MinusAssign,
42    MultiplyAssign,
43    DivideAssign,
44    Union, // |
45    Intersection, // &
46
47    // Delimiters
48    LeftParen,
49    RightParen,
50    LeftBrace,
51    RightBrace,
52    LeftBracket,
53    RightBracket,
54    Semicolon,
55    Comma,
56    Dot,
57    Colon,
58    QuestionMark,
59
60    // Type annotations
61    TypeAnnotation,
62    GenericStart,
63    GenericEnd,
64
65    // Special
66    Newline,
67    Whitespace,
68    Comment(String),
69    EOF,
70}
71
72/// TypeScript keywords
73#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
74pub enum Keyword {
75    // Declarations
76    Let,
77    Const,
78    Var,
79    Function,
80    Class,
81    Interface,
82    Type,
83    Enum,
84    Namespace,
85    Module,
86    Import,
87    Export,
88    From,
89    As,
90    Default,
91
92    // Control flow
93    If,
94    Else,
95    Switch,
96    Case,
97    DefaultCase,
98    For,
99    While,
100    Do,
101    Break,
102    Continue,
103    Return,
104    Throw,
105    Try,
106    Catch,
107    Finally,
108
109    // OOP
110    Extends,
111    Implements,
112    Super,
113    This,
114    New,
115    Static,
116    Public,
117    Private,
118    Protected,
119    Abstract,
120    Readonly,
121
122    // Async
123    Async,
124    Await,
125    Promise,
126
127    // Types
128    Any,
129    Unknown,
130    Never,
131    Void,
132    Null,
133    Undefined,
134    Boolean,
135    Number,
136    String,
137    Object,
138    Array,
139    Tuple,
140    Union,
141    Intersection,
142    Literal,
143    Mapped,
144    Conditional,
145    Template,
146
147    // Utility types
148    Partial,
149    Required,
150    Pick,
151    Omit,
152    Record,
153    Exclude,
154    Extract,
155    NonNullable,
156    Parameters,
157    ReturnType,
158    InstanceType,
159    ThisParameterType,
160    OmitThisParameter,
161    ThisType,
162
163    // Other
164    True,
165    False,
166    In,
167    Of,
168    Instanceof,
169    Typeof,
170    Keyof,
171    Key,
172    Is,
173    Asserts,
174    Infer,
175    Declare,
176    Ambient,
177    Global,
178}
179
180/// Lexer for TypeScript code
181pub struct Lexer {
182    input: String,
183    position: usize,
184    line: usize,
185    column: usize,
186}
187
188impl Lexer {
189    /// Create a new lexer
190    pub fn new(input: String) -> Self {
191        Self {
192            input,
193            position: 0,
194            line: 1,
195            column: 1,
196        }
197    }
198
199    /// Tokenize the input string
200    pub fn tokenize(&mut self) -> Result<Vec<Token>> {
201        let mut tokens = Vec::new();
202
203        while self.position < self.input.len() {
204            match self.next_token()? {
205                Some(token) => {
206                    println!("Token: {:?}", token);
207                    tokens.push(token);
208                }
209                None => break,
210            }
211        }
212
213        tokens.push(Token::EOF);
214        Ok(tokens)
215    }
216
217    /// Get the next token
218    fn next_token(&mut self) -> Result<Option<Token>> {
219        self.skip_whitespace();
220
221        if self.position >= self.input.len() {
222            return Ok(None);
223        }
224
225        let ch = self.current_char();
226        let token = match ch {
227            '+' => {
228                if self.peek_char() == Some('=') {
229                    self.advance();
230                    Ok(Some(Token::PlusAssign))
231                } else if self.peek_char() == Some('+') {
232                    self.advance();
233                    Ok(Some(Token::Plus)) // ++ operator
234                } else {
235                    Ok(Some(Token::Plus))
236                }
237            }
238            '-' => {
239                if self.peek_char() == Some('=') {
240                    self.advance();
241                    Ok(Some(Token::MinusAssign))
242                } else if self.peek_char() == Some('>') {
243                    self.advance();
244                    Ok(Some(Token::Arrow))
245                } else {
246                    Ok(Some(Token::Minus))
247                }
248            }
249            '*' => {
250                if self.peek_char() == Some('=') {
251                    self.advance();
252                    Ok(Some(Token::MultiplyAssign))
253                } else {
254                    Ok(Some(Token::Multiply))
255                }
256            }
257            '/' => {
258                if self.peek_char() == Some('=') {
259                    self.advance();
260                    Ok(Some(Token::DivideAssign))
261                } else if self.peek_char() == Some('/') {
262                    self.advance();
263                    self.skip_line_comment();
264                    Ok(None)
265                } else if self.peek_char() == Some('*') {
266                    self.advance();
267                    self.skip_block_comment();
268                    Ok(None)
269                } else {
270                    Ok(Some(Token::Divide))
271                }
272            }
273            '%' => Ok(Some(Token::Modulo)),
274            '=' => {
275                if self.peek_char() == Some('=') {
276                    self.advance();
277                    if self.peek_char() == Some('=') {
278                        self.advance();
279                        Ok(Some(Token::StrictEqual))
280                    } else {
281                        Ok(Some(Token::Equal))
282                    }
283                } else if self.peek_char() == Some('>') {
284                    self.advance();
285                    Ok(Some(Token::Arrow))
286                } else {
287                    Ok(Some(Token::Assign))
288                }
289            }
290            '!' => {
291                if self.peek_char() == Some('=') {
292                    self.advance();
293                    if self.peek_char() == Some('=') {
294                        self.advance();
295                        Ok(Some(Token::StrictNotEqual))
296                    } else {
297                        Ok(Some(Token::NotEqual))
298                    }
299                } else {
300                    Ok(Some(Token::Not))
301                }
302            }
303            '<' => {
304                if self.peek_char() == Some('=') {
305                    self.advance();
306                    Ok(Some(Token::LessEqual))
307                } else {
308                    Ok(Some(Token::LessThan))
309                }
310            }
311            '>' => {
312                if self.peek_char() == Some('=') {
313                    self.advance();
314                    Ok(Some(Token::GreaterEqual))
315                } else {
316                    Ok(Some(Token::GreaterThan))
317                }
318            }
319            '&' => {
320                if self.peek_char() == Some('&') {
321                    self.advance();
322                    Ok(Some(Token::And))
323                } else {
324                    Ok(Some(Token::Intersection))
325                }
326            }
327            '|' => {
328                if self.peek_char() == Some('|') {
329                    self.advance();
330                    Ok(Some(Token::Or))
331                } else {
332                    Ok(Some(Token::Union))
333                }
334            }
335            '(' => Ok(Some(Token::LeftParen)),
336            ')' => Ok(Some(Token::RightParen)),
337            '{' => Ok(Some(Token::LeftBrace)),
338            '}' => Ok(Some(Token::RightBrace)),
339            '[' => Ok(Some(Token::LeftBracket)),
340            ']' => Ok(Some(Token::RightBracket)),
341            ';' => Ok(Some(Token::Semicolon)),
342            ',' => Ok(Some(Token::Comma)),
343            '.' => Ok(Some(Token::Dot)),
344            ':' => Ok(Some(Token::Colon)),
345            '?' => Ok(Some(Token::QuestionMark)),
346            '"' | '\'' => Ok(self.parse_string()?),
347            '`' => Ok(self.parse_template_literal()?),
348            '0'..='9' => Ok(self.parse_number()?),
349            'a'..='z' | 'A'..='Z' | '_' | '$' => Ok(self.parse_identifier_or_keyword()?),
350            _ => {
351                return Err(CompilerError::parse_error(
352                    self.line,
353                    self.column,
354                    format!("Unexpected character: {}", ch),
355                ));
356            }
357        };
358
359        // Only advance for simple tokens that don't manage position themselves
360        match ch {
361            'a'..='z' | 'A'..='Z' | '_' | '$' => {
362                // parse_identifier_or_keyword manages position itself
363            }
364            '0'..='9' => {
365                // parse_number manages position itself
366            }
367            '"' | '\'' => {
368                // parse_string manages position itself
369            }
370            _ => {
371                // Simple tokens need to advance
372                self.advance();
373            }
374        }
375        token
376    }
377
378    /// Get current character
379    fn current_char(&self) -> char {
380        self.input.chars().nth(self.position).unwrap_or('\0')
381    }
382
383    /// Peek at next character
384    fn peek_char(&self) -> Option<char> {
385        self.input.chars().nth(self.position + 1)
386    }
387
388    /// Advance position
389    fn advance(&mut self) {
390        if self.current_char() == '\n' {
391            self.line += 1;
392            self.column = 1;
393        } else {
394            self.column += 1;
395        }
396        self.position += 1;
397    }
398
399    /// Skip whitespace
400    fn skip_whitespace(&mut self) {
401        while self.position < self.input.len() {
402            let ch = self.current_char();
403            if ch.is_whitespace() {
404                self.advance();
405            } else if ch == '/' && self.peek_char() == Some('/') {
406                // Skip line comment
407                self.advance(); // skip first /
408                self.advance(); // skip second /
409                while self.position < self.input.len() && self.current_char() != '\n' {
410                    self.advance();
411                }
412            } else if ch == '/' && self.peek_char() == Some('*') {
413                // Skip block comment
414                self.advance(); // skip /
415                self.advance(); // skip *
416                while self.position < self.input.len() {
417                    if self.current_char() == '*' && self.peek_char() == Some('/') {
418                        self.advance(); // skip *
419                        self.advance(); // skip /
420                        break;
421                    }
422                    self.advance();
423                }
424            } else {
425                break;
426            }
427        }
428    }
429
430    /// Skip line comment
431    fn skip_line_comment(&mut self) -> Option<Token> {
432        while self.position < self.input.len() && self.current_char() != '\n' {
433            self.advance();
434        }
435        None
436    }
437
438    /// Skip block comment
439    fn skip_block_comment(&mut self) -> Option<Token> {
440        while self.position < self.input.len() {
441            if self.current_char() == '*' && self.peek_char() == Some('/') {
442                self.advance();
443                self.advance();
444                break;
445            }
446            self.advance();
447        }
448        None
449    }
450
451    /// Parse string literal
452    fn parse_string(&mut self) -> Result<Option<Token>> {
453        let quote = self.current_char();
454        let mut value = String::new();
455        self.advance();
456
457        while self.position < self.input.len() {
458            let ch = self.current_char();
459            if ch == quote {
460                self.advance();
461                return Ok(Some(Token::String(value)));
462            } else if ch == '\\' {
463                self.advance();
464                if self.position < self.input.len() {
465                    let escaped = self.current_char();
466                    value.push(match escaped {
467                        'n' => '\n',
468                        't' => '\t',
469                        'r' => '\r',
470                        '\\' => '\\',
471                        '"' => '"',
472                        '\'' => '\'',
473                        _ => escaped,
474                    });
475                    self.advance();
476                }
477            } else {
478                value.push(ch);
479                self.advance();
480            }
481        }
482
483        Err(CompilerError::parse_error(
484            self.line,
485            self.column,
486            "Unterminated string literal",
487        ))
488    }
489
490    /// Parse template literal
491    fn parse_template_literal(&mut self) -> Result<Option<Token>> {
492        let mut value = String::new();
493        self.advance(); // consume opening backtick
494
495        while self.position < self.input.len() {
496            let ch = self.current_char();
497            if ch == '`' {
498                self.advance();
499                return Ok(Some(Token::TemplateLiteral(value)));
500            } else if ch == '\\' {
501                self.advance();
502                if self.position < self.input.len() {
503                    let escaped = self.current_char();
504                    value.push(match escaped {
505                        'n' => '\n',
506                        't' => '\t',
507                        'r' => '\r',
508                        '\\' => '\\',
509                        '`' => '`',
510                        '$' => '$',
511                        _ => escaped,
512                    });
513                    self.advance();
514                }
515            } else if ch == '$' && self.position + 1 < self.input.len() && self.input.chars().nth(self.position + 1) == Some('{') {
516                // Handle ${} interpolation - include the full ${} in the string for now
517                value.push('$');
518                self.advance();
519                if self.position < self.input.len() {
520                    value.push('{');
521                    self.advance();
522                    // Skip to closing brace
523                    while self.position < self.input.len() && self.current_char() != '}' {
524                        value.push(self.current_char());
525                        self.advance();
526                    }
527                    if self.position < self.input.len() {
528                        value.push('}');
529                        self.advance();
530                    }
531                }
532            } else {
533                value.push(ch);
534                self.advance();
535            }
536        }
537
538        Err(CompilerError::parse_error(
539            self.line,
540            self.column,
541            "Unterminated template literal",
542        ))
543    }
544
545    /// Parse number literal
546    fn parse_number(&mut self) -> Result<Option<Token>> {
547        let mut value = String::new();
548        let mut has_dot = false;
549
550        while self.position < self.input.len() {
551            let ch = self.current_char();
552            if ch.is_ascii_digit() {
553                value.push(ch);
554                self.advance();
555            } else if ch == '.' && !has_dot {
556                has_dot = true;
557                value.push(ch);
558                self.advance();
559            } else {
560                break;
561            }
562        }
563
564        let number: f64 = value.parse().map_err(|_| {
565            CompilerError::parse_error(self.line, self.column, "Invalid number literal")
566        })?;
567
568        Ok(Some(Token::Number(number)))
569    }
570
571    /// Parse identifier or keyword
572    fn parse_identifier_or_keyword(&mut self) -> Result<Option<Token>> {
573        let mut value = String::new();
574
575        while self.position < self.input.len() {
576            let ch = self.current_char();
577            if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
578                value.push(ch);
579                self.advance();
580            } else {
581                break;
582            }
583        }
584
585        // Check if it's a literal
586        if value == "true" {
587            Ok(Some(Token::Boolean(true)))
588        } else if value == "false" {
589            Ok(Some(Token::Boolean(false)))
590        } else if let Some(keyword) = self.parse_keyword(&value) {
591            Ok(Some(Token::Keyword(keyword)))
592        } else {
593            Ok(Some(Token::Identifier(value)))
594        }
595    }
596
597    /// Parse keyword from string
598    fn parse_keyword(&self, value: &str) -> Option<Keyword> {
599        match value {
600            "let" => Some(Keyword::Let),
601            "const" => Some(Keyword::Const),
602            "var" => Some(Keyword::Var),
603            "function" => Some(Keyword::Function),
604            "class" => Some(Keyword::Class),
605            "interface" => Some(Keyword::Interface),
606            "type" => Some(Keyword::Type),
607            "enum" => Some(Keyword::Enum),
608            "namespace" => Some(Keyword::Namespace),
609            "module" => Some(Keyword::Module),
610            "import" => Some(Keyword::Import),
611            "export" => Some(Keyword::Export),
612            "from" => Some(Keyword::From),
613            "as" => Some(Keyword::As),
614            "default" => Some(Keyword::Default),
615            "if" => Some(Keyword::If),
616            "else" => Some(Keyword::Else),
617            "switch" => Some(Keyword::Switch),
618            "case" => Some(Keyword::Case),
619            "for" => Some(Keyword::For),
620            "while" => Some(Keyword::While),
621            "do" => Some(Keyword::Do),
622            "break" => Some(Keyword::Break),
623            "continue" => Some(Keyword::Continue),
624            "return" => Some(Keyword::Return),
625            "throw" => Some(Keyword::Throw),
626            "try" => Some(Keyword::Try),
627            "catch" => Some(Keyword::Catch),
628            "finally" => Some(Keyword::Finally),
629            "extends" => Some(Keyword::Extends),
630            "implements" => Some(Keyword::Implements),
631            "super" => Some(Keyword::Super),
632            "this" => Some(Keyword::This),
633            "new" => Some(Keyword::New),
634            "static" => Some(Keyword::Static),
635            "public" => Some(Keyword::Public),
636            "private" => Some(Keyword::Private),
637            "protected" => Some(Keyword::Protected),
638            "abstract" => Some(Keyword::Abstract),
639            "readonly" => Some(Keyword::Readonly),
640            "async" => Some(Keyword::Async),
641            "await" => Some(Keyword::Await),
642            "Promise" => Some(Keyword::Promise),
643            "any" => Some(Keyword::Any),
644            "unknown" => Some(Keyword::Unknown),
645            "never" => Some(Keyword::Never),
646            "void" => Some(Keyword::Void),
647            "null" => Some(Keyword::Null),
648            "undefined" => Some(Keyword::Undefined),
649            "boolean" => Some(Keyword::Boolean),
650            "number" => Some(Keyword::Number),
651            "string" => Some(Keyword::String),
652            "object" => Some(Keyword::Object),
653            "Array" => Some(Keyword::Array),
654            "true" => Some(Keyword::True),
655            "false" => Some(Keyword::False),
656            "in" => Some(Keyword::In),
657            "of" => Some(Keyword::Of),
658            "instanceof" => Some(Keyword::Instanceof),
659            "typeof" => Some(Keyword::Typeof),
660            "keyof" => Some(Keyword::Keyof),
661            "key" => Some(Keyword::Key),
662            "is" => Some(Keyword::Is),
663            "asserts" => Some(Keyword::Asserts),
664            "infer" => Some(Keyword::Infer),
665            "declare" => Some(Keyword::Declare),
666            "global" => Some(Keyword::Global),
667            _ => None,
668        }
669    }
670}