TypeScript_Rust_Compiler/
lexer.rs

1//! Lexical analysis for TypeScript code
2
3use crate::error::{CompilerError, Result};
4use serde::{Deserialize, Serialize};
5
6/// Token types for TypeScript
7#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub enum Token {
9    // Literals
10    Number(f64),
11    String(String),
12    TemplateLiteral(String),
13    Boolean(bool),
14    Null,
15    Undefined,
16
17    // Identifiers and keywords
18    Identifier(String),
19    Keyword(Keyword),
20
21    // Operators
22    Plus,
23    Minus,
24    Multiply,
25    Divide,
26    Modulo,
27    Equal,
28    NotEqual,
29    StrictEqual,
30    StrictNotEqual,
31    LessThan,
32    GreaterThan,
33    LessEqual,
34    GreaterEqual,
35    And,
36    Or,
37    Not,
38    Assign,
39    Arrow, // =>
40    PlusAssign,
41    MinusAssign,
42    MultiplyAssign,
43    DivideAssign,
44    Union, // |
45    Intersection, // &
46
47    // Delimiters
48    LeftParen,
49    RightParen,
50    LeftBrace,
51    RightBrace,
52    LeftBracket,
53    RightBracket,
54    Semicolon,
55    Comma,
56    Dot,
57    Colon,
58    QuestionMark,
59
60    // Type annotations
61    TypeAnnotation,
62    GenericStart,
63    GenericEnd,
64
65    // Special
66    Newline,
67    Whitespace,
68    Comment(String),
69    EOF,
70}
71
72/// TypeScript keywords
73#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
74pub enum Keyword {
75    // Declarations
76    Let,
77    Const,
78    Var,
79    Function,
80    Class,
81    Interface,
82    Type,
83    Enum,
84    Namespace,
85    Module,
86    Import,
87    Export,
88    From,
89    As,
90    Default,
91
92    // Control flow
93    If,
94    Else,
95    Switch,
96    Case,
97    DefaultCase,
98    For,
99    While,
100    Do,
101    Break,
102    Continue,
103    Return,
104    Throw,
105    Try,
106    Catch,
107    Finally,
108
109    // OOP
110    Extends,
111    Implements,
112    Super,
113    This,
114    New,
115    Static,
116    Public,
117    Private,
118    Protected,
119    Abstract,
120    Readonly,
121    Get,
122    Set,
123
124    // Async
125    Async,
126    Await,
127    Promise,
128
129    // Types
130    Any,
131    Unknown,
132    Never,
133    Void,
134    Null,
135    Undefined,
136    Boolean,
137    Number,
138    String,
139    Object,
140    Array,
141    Tuple,
142    Union,
143    Intersection,
144    Literal,
145    Mapped,
146    Conditional,
147    Template,
148
149    // Utility types
150    Partial,
151    Required,
152    Pick,
153    Omit,
154    Record,
155    Exclude,
156    Extract,
157    NonNullable,
158    Parameters,
159    ReturnType,
160    InstanceType,
161    ThisParameterType,
162    OmitThisParameter,
163    ThisType,
164
165    // Other
166    True,
167    False,
168    In,
169    Of,
170    Instanceof,
171    Typeof,
172    Keyof,
173    Key,
174    Is,
175    Asserts,
176    Infer,
177    Declare,
178    Ambient,
179    Global,
180}
181
182/// Lexer for TypeScript code
183pub struct Lexer {
184    input: String,
185    position: usize,
186    line: usize,
187    column: usize,
188}
189
190impl Lexer {
191    /// Create a new lexer
192    pub fn new(input: String) -> Self {
193        Self {
194            input,
195            position: 0,
196            line: 1,
197            column: 1,
198        }
199    }
200
201    /// Tokenize the input string
202    pub fn tokenize(&mut self) -> Result<Vec<Token>> {
203        let mut tokens = Vec::new();
204
205        while self.position < self.input.len() {
206            match self.next_token()? {
207                Some(token) => {
208                    tokens.push(token);
209                }
210                None => break,
211            }
212        }
213
214        tokens.push(Token::EOF);
215        Ok(tokens)
216    }
217
218    /// Get the next token
219    fn next_token(&mut self) -> Result<Option<Token>> {
220        self.skip_whitespace();
221
222        if self.position >= self.input.len() {
223            return Ok(None);
224        }
225
226        let ch = self.current_char();
227        let token = match ch {
228            '+' => {
229                if self.peek_char() == Some('=') {
230                    self.advance();
231                    Ok(Some(Token::PlusAssign))
232                } else if self.peek_char() == Some('+') {
233                    self.advance();
234                    Ok(Some(Token::Plus)) // ++ operator
235                } else {
236                    Ok(Some(Token::Plus))
237                }
238            }
239            '-' => {
240                if self.peek_char() == Some('=') {
241                    self.advance();
242                    Ok(Some(Token::MinusAssign))
243                } else if self.peek_char() == Some('>') {
244                    self.advance();
245                    Ok(Some(Token::Arrow))
246                } else {
247                    Ok(Some(Token::Minus))
248                }
249            }
250            '*' => {
251                if self.peek_char() == Some('=') {
252                    self.advance();
253                    Ok(Some(Token::MultiplyAssign))
254                } else {
255                    Ok(Some(Token::Multiply))
256                }
257            }
258            '/' => {
259                if self.peek_char() == Some('=') {
260                    self.advance();
261                    Ok(Some(Token::DivideAssign))
262                } else if self.peek_char() == Some('/') {
263                    self.advance();
264                    self.skip_line_comment();
265                    Ok(None)
266                } else if self.peek_char() == Some('*') {
267                    self.advance();
268                    self.skip_block_comment();
269                    Ok(None)
270                } else {
271                    Ok(Some(Token::Divide))
272                }
273            }
274            '%' => Ok(Some(Token::Modulo)),
275            '=' => {
276                if self.peek_char() == Some('=') {
277                    self.advance();
278                    if self.peek_char() == Some('=') {
279                        self.advance();
280                        Ok(Some(Token::StrictEqual))
281                    } else {
282                        Ok(Some(Token::Equal))
283                    }
284                } else if self.peek_char() == Some('>') {
285                    self.advance();
286                    Ok(Some(Token::Arrow))
287                } else {
288                    Ok(Some(Token::Assign))
289                }
290            }
291            '!' => {
292                if self.peek_char() == Some('=') {
293                    self.advance();
294                    if self.peek_char() == Some('=') {
295                        self.advance();
296                        Ok(Some(Token::StrictNotEqual))
297                    } else {
298                        Ok(Some(Token::NotEqual))
299                    }
300                } else {
301                    Ok(Some(Token::Not))
302                }
303            }
304            '<' => {
305                if self.peek_char() == Some('=') {
306                    self.advance();
307                    Ok(Some(Token::LessEqual))
308                } else {
309                    Ok(Some(Token::LessThan))
310                }
311            }
312            '>' => {
313                if self.peek_char() == Some('=') {
314                    self.advance();
315                    Ok(Some(Token::GreaterEqual))
316                } else {
317                    Ok(Some(Token::GreaterThan))
318                }
319            }
320            '&' => {
321                if self.peek_char() == Some('&') {
322                    self.advance();
323                    Ok(Some(Token::And))
324                } else {
325                    Ok(Some(Token::Intersection))
326                }
327            }
328            '|' => {
329                if self.peek_char() == Some('|') {
330                    self.advance();
331                    Ok(Some(Token::Or))
332                } else {
333                    Ok(Some(Token::Union))
334                }
335            }
336            '(' => Ok(Some(Token::LeftParen)),
337            ')' => Ok(Some(Token::RightParen)),
338            '{' => Ok(Some(Token::LeftBrace)),
339            '}' => Ok(Some(Token::RightBrace)),
340            '[' => Ok(Some(Token::LeftBracket)),
341            ']' => Ok(Some(Token::RightBracket)),
342            ';' => Ok(Some(Token::Semicolon)),
343            ',' => Ok(Some(Token::Comma)),
344            '.' => Ok(Some(Token::Dot)),
345            ':' => Ok(Some(Token::Colon)),
346            '?' => Ok(Some(Token::QuestionMark)),
347            '"' | '\'' => Ok(self.parse_string()?),
348            '`' => Ok(self.parse_template_literal()?),
349            '0'..='9' => Ok(self.parse_number()?),
350            'a'..='z' | 'A'..='Z' | '_' | '$' => Ok(self.parse_identifier_or_keyword()?),
351            _ => {
352                return Err(CompilerError::parse_error(
353                    self.line,
354                    self.column,
355                    format!("Unexpected character: {}", ch),
356                ));
357            }
358        };
359
360        // Only advance for simple tokens that don't manage position themselves
361        match ch {
362            'a'..='z' | 'A'..='Z' | '_' | '$' => {
363                // parse_identifier_or_keyword manages position itself
364            }
365            '0'..='9' => {
366                // parse_number manages position itself
367            }
368            '"' | '\'' => {
369                // parse_string manages position itself
370            }
371            _ => {
372                // Simple tokens need to advance
373                self.advance();
374            }
375        }
376        token
377    }
378
379    /// Get current character
380    fn current_char(&self) -> char {
381        self.input.chars().nth(self.position).unwrap_or('\0')
382    }
383
384    /// Peek at next character
385    fn peek_char(&self) -> Option<char> {
386        self.input.chars().nth(self.position + 1)
387    }
388
389    /// Advance position
390    fn advance(&mut self) {
391        if self.current_char() == '\n' {
392            self.line += 1;
393            self.column = 1;
394        } else {
395            self.column += 1;
396        }
397        self.position += 1;
398    }
399
400    /// Skip whitespace
401    fn skip_whitespace(&mut self) {
402        while self.position < self.input.len() {
403            let ch = self.current_char();
404            if ch.is_whitespace() {
405                self.advance();
406            } else if ch == '/' && self.peek_char() == Some('/') {
407                // Skip line comment
408                self.advance(); // skip first /
409                self.advance(); // skip second /
410                while self.position < self.input.len() && self.current_char() != '\n' {
411                    self.advance();
412                }
413            } else if ch == '/' && self.peek_char() == Some('*') {
414                // Skip block comment
415                self.advance(); // skip /
416                self.advance(); // skip *
417                while self.position < self.input.len() {
418                    if self.current_char() == '*' && self.peek_char() == Some('/') {
419                        self.advance(); // skip *
420                        self.advance(); // skip /
421                        break;
422                    }
423                    self.advance();
424                }
425            } else {
426                break;
427            }
428        }
429    }
430
431    /// Skip line comment
432    fn skip_line_comment(&mut self) -> Option<Token> {
433        while self.position < self.input.len() && self.current_char() != '\n' {
434            self.advance();
435        }
436        None
437    }
438
439    /// Skip block comment
440    fn skip_block_comment(&mut self) -> Option<Token> {
441        while self.position < self.input.len() {
442            if self.current_char() == '*' && self.peek_char() == Some('/') {
443                self.advance();
444                self.advance();
445                break;
446            }
447            self.advance();
448        }
449        None
450    }
451
452    /// Parse string literal
453    fn parse_string(&mut self) -> Result<Option<Token>> {
454        let quote = self.current_char();
455        let mut value = String::new();
456        self.advance();
457
458        while self.position < self.input.len() {
459            let ch = self.current_char();
460            if ch == quote {
461                self.advance();
462                return Ok(Some(Token::String(value)));
463            } else if ch == '\\' {
464                self.advance();
465                if self.position < self.input.len() {
466                    let escaped = self.current_char();
467                    value.push(match escaped {
468                        'n' => '\n',
469                        't' => '\t',
470                        'r' => '\r',
471                        '\\' => '\\',
472                        '"' => '"',
473                        '\'' => '\'',
474                        _ => escaped,
475                    });
476                    self.advance();
477                }
478            } else {
479                value.push(ch);
480                self.advance();
481            }
482        }
483
484        Err(CompilerError::parse_error(
485            self.line,
486            self.column,
487            "Unterminated string literal",
488        ))
489    }
490
491    /// Parse template literal
492    fn parse_template_literal(&mut self) -> Result<Option<Token>> {
493        let mut value = String::new();
494        self.advance(); // consume opening backtick
495
496        while self.position < self.input.len() {
497            let ch = self.current_char();
498            if ch == '`' {
499                self.advance();
500                return Ok(Some(Token::TemplateLiteral(value)));
501            } else if ch == '\\' {
502                self.advance();
503                if self.position < self.input.len() {
504                    let escaped = self.current_char();
505                    value.push(match escaped {
506                        'n' => '\n',
507                        't' => '\t',
508                        'r' => '\r',
509                        '\\' => '\\',
510                        '`' => '`',
511                        '$' => '$',
512                        _ => escaped,
513                    });
514                    self.advance();
515                }
516            } else if ch == '$' && self.position + 1 < self.input.len() && self.input.chars().nth(self.position + 1) == Some('{') {
517                // Handle ${} interpolation - include the full ${} in the string for now
518                value.push('$');
519                self.advance();
520                if self.position < self.input.len() {
521                    value.push('{');
522                    self.advance();
523                    // Skip to closing brace
524                    while self.position < self.input.len() && self.current_char() != '}' {
525                        value.push(self.current_char());
526                        self.advance();
527                    }
528                    if self.position < self.input.len() {
529                        value.push('}');
530                        self.advance();
531                    }
532                }
533            } else {
534                value.push(ch);
535                self.advance();
536            }
537        }
538
539        Err(CompilerError::parse_error(
540            self.line,
541            self.column,
542            "Unterminated template literal",
543        ))
544    }
545
546    /// Parse number literal
547    fn parse_number(&mut self) -> Result<Option<Token>> {
548        let mut value = String::new();
549        let mut has_dot = false;
550
551        while self.position < self.input.len() {
552            let ch = self.current_char();
553            if ch.is_ascii_digit() {
554                value.push(ch);
555                self.advance();
556            } else if ch == '.' && !has_dot {
557                has_dot = true;
558                value.push(ch);
559                self.advance();
560            } else {
561                break;
562            }
563        }
564
565        let number: f64 = value.parse().map_err(|_| {
566            CompilerError::parse_error(self.line, self.column, "Invalid number literal")
567        })?;
568
569        Ok(Some(Token::Number(number)))
570    }
571
572    /// Parse identifier or keyword
573    fn parse_identifier_or_keyword(&mut self) -> Result<Option<Token>> {
574        let mut value = String::new();
575
576        while self.position < self.input.len() {
577            let ch = self.current_char();
578            if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
579                value.push(ch);
580                self.advance();
581            } else {
582                break;
583            }
584        }
585
586        // Check if it's a literal
587        if value == "true" {
588            Ok(Some(Token::Boolean(true)))
589        } else if value == "false" {
590            Ok(Some(Token::Boolean(false)))
591        } else if let Some(keyword) = self.parse_keyword(&value) {
592            Ok(Some(Token::Keyword(keyword)))
593        } else {
594            Ok(Some(Token::Identifier(value)))
595        }
596    }
597
598    /// Parse keyword from string
599    fn parse_keyword(&self, value: &str) -> Option<Keyword> {
600        match value {
601            "let" => Some(Keyword::Let),
602            "const" => Some(Keyword::Const),
603            "var" => Some(Keyword::Var),
604            "function" => Some(Keyword::Function),
605            "class" => Some(Keyword::Class),
606            "interface" => Some(Keyword::Interface),
607            "type" => Some(Keyword::Type),
608            "enum" => Some(Keyword::Enum),
609            "namespace" => Some(Keyword::Namespace),
610            "module" => Some(Keyword::Module),
611            "import" => Some(Keyword::Import),
612            "export" => Some(Keyword::Export),
613            "from" => Some(Keyword::From),
614            "as" => Some(Keyword::As),
615            "default" => Some(Keyword::Default),
616            "if" => Some(Keyword::If),
617            "else" => Some(Keyword::Else),
618            "switch" => Some(Keyword::Switch),
619            "case" => Some(Keyword::Case),
620            "for" => Some(Keyword::For),
621            "while" => Some(Keyword::While),
622            "do" => Some(Keyword::Do),
623            "break" => Some(Keyword::Break),
624            "continue" => Some(Keyword::Continue),
625            "return" => Some(Keyword::Return),
626            "throw" => Some(Keyword::Throw),
627            "try" => Some(Keyword::Try),
628            "catch" => Some(Keyword::Catch),
629            "finally" => Some(Keyword::Finally),
630            "extends" => Some(Keyword::Extends),
631            "implements" => Some(Keyword::Implements),
632            "super" => Some(Keyword::Super),
633            "this" => Some(Keyword::This),
634            "new" => Some(Keyword::New),
635            "static" => Some(Keyword::Static),
636            "public" => Some(Keyword::Public),
637            "private" => Some(Keyword::Private),
638            "protected" => Some(Keyword::Protected),
639            "abstract" => Some(Keyword::Abstract),
640            "readonly" => Some(Keyword::Readonly),
641            "get" => Some(Keyword::Get),
642            "set" => Some(Keyword::Set),
643            "async" => Some(Keyword::Async),
644            "await" => Some(Keyword::Await),
645            "Promise" => Some(Keyword::Promise),
646            "any" => Some(Keyword::Any),
647            "unknown" => Some(Keyword::Unknown),
648            "never" => Some(Keyword::Never),
649            "void" => Some(Keyword::Void),
650            "null" => Some(Keyword::Null),
651            "undefined" => Some(Keyword::Undefined),
652            "boolean" => Some(Keyword::Boolean),
653            "number" => Some(Keyword::Number),
654            "string" => Some(Keyword::String),
655            "object" => Some(Keyword::Object),
656            "Array" => Some(Keyword::Array),
657            "true" => Some(Keyword::True),
658            "false" => Some(Keyword::False),
659            "in" => Some(Keyword::In),
660            "of" => Some(Keyword::Of),
661            "instanceof" => Some(Keyword::Instanceof),
662            "typeof" => Some(Keyword::Typeof),
663            "keyof" => Some(Keyword::Keyof),
664            "key" => Some(Keyword::Key),
665            "is" => Some(Keyword::Is),
666            "asserts" => Some(Keyword::Asserts),
667            "infer" => Some(Keyword::Infer),
668            "declare" => Some(Keyword::Declare),
669            "global" => Some(Keyword::Global),
670            _ => None,
671        }
672    }
673}