TypeScript_Rust_Compiler/
lexer.rs

1//! Lexical analysis for TypeScript code
2
3use crate::error::{CompilerError, Result};
4use serde::{Deserialize, Serialize};
5
6/// Token types for TypeScript
7#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub enum Token {
9    // Literals
10    Number(f64),
11    String(String),
12    TemplateLiteral(String),
13    Boolean(bool),
14    Null,
15    Undefined,
16
17    // Identifiers and keywords
18    Identifier(String),
19    Keyword(Keyword),
20
21    // Operators
22    Plus,
23    Minus,
24    Multiply,
25    Divide,
26    Modulo,
27    Equal,
28    NotEqual,
29    StrictEqual,
30    StrictNotEqual,
31    LessThan,
32    GreaterThan,
33    LessEqual,
34    GreaterEqual,
35    And,
36    Or,
37    Not,
38    Assign,
39    PlusAssign,
40    MinusAssign,
41    MultiplyAssign,
42    DivideAssign,
43
44    // Delimiters
45    LeftParen,
46    RightParen,
47    LeftBrace,
48    RightBrace,
49    LeftBracket,
50    RightBracket,
51    Semicolon,
52    Comma,
53    Dot,
54    Colon,
55    QuestionMark,
56    Arrow,
57
58    // Type annotations
59    TypeAnnotation,
60    GenericStart,
61    GenericEnd,
62
63    // Special
64    Newline,
65    Whitespace,
66    Comment(String),
67    EOF,
68}
69
70/// TypeScript keywords
71#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
72pub enum Keyword {
73    // Declarations
74    Let,
75    Const,
76    Var,
77    Function,
78    Class,
79    Interface,
80    Type,
81    Enum,
82    Namespace,
83    Module,
84    Import,
85    Export,
86    From,
87    As,
88    Default,
89
90    // Control flow
91    If,
92    Else,
93    Switch,
94    Case,
95    DefaultCase,
96    For,
97    While,
98    Do,
99    Break,
100    Continue,
101    Return,
102    Throw,
103    Try,
104    Catch,
105    Finally,
106
107    // OOP
108    Extends,
109    Implements,
110    Super,
111    This,
112    New,
113    Static,
114    Public,
115    Private,
116    Protected,
117    Abstract,
118    Readonly,
119
120    // Async
121    Async,
122    Await,
123    Promise,
124
125    // Types
126    Any,
127    Unknown,
128    Never,
129    Void,
130    Null,
131    Undefined,
132    Boolean,
133    Number,
134    String,
135    Object,
136    Array,
137    Tuple,
138    Union,
139    Intersection,
140    Literal,
141    Mapped,
142    Conditional,
143    Template,
144
145    // Utility types
146    Partial,
147    Required,
148    Pick,
149    Omit,
150    Record,
151    Exclude,
152    Extract,
153    NonNullable,
154    Parameters,
155    ReturnType,
156    InstanceType,
157    ThisParameterType,
158    OmitThisParameter,
159    ThisType,
160
161    // Other
162    True,
163    False,
164    In,
165    Of,
166    Instanceof,
167    Typeof,
168    Keyof,
169    Is,
170    Asserts,
171    Infer,
172    Declare,
173    Ambient,
174    Global,
175}
176
177/// Lexer for TypeScript code
178pub struct Lexer {
179    input: String,
180    position: usize,
181    line: usize,
182    column: usize,
183}
184
185impl Lexer {
186    /// Create a new lexer
187    pub fn new(input: String) -> Self {
188        Self {
189            input,
190            position: 0,
191            line: 1,
192            column: 1,
193        }
194    }
195
196    /// Tokenize the input string
197    pub fn tokenize(&mut self) -> Result<Vec<Token>> {
198        let mut tokens = Vec::new();
199
200        while self.position < self.input.len() {
201            match self.next_token()? {
202                Some(token) => {
203                    println!("Token: {:?}", token);
204                    tokens.push(token);
205                }
206                None => break,
207            }
208        }
209
210        tokens.push(Token::EOF);
211        Ok(tokens)
212    }
213
214    /// Get the next token
215    fn next_token(&mut self) -> Result<Option<Token>> {
216        self.skip_whitespace();
217
218        if self.position >= self.input.len() {
219            return Ok(None);
220        }
221
222        let ch = self.current_char();
223        let token = match ch {
224            '+' => {
225                if self.peek_char() == Some('=') {
226                    self.advance();
227                    Ok(Some(Token::PlusAssign))
228                } else if self.peek_char() == Some('+') {
229                    self.advance();
230                    Ok(Some(Token::Plus)) // ++ operator
231                } else {
232                    Ok(Some(Token::Plus))
233                }
234            }
235            '-' => {
236                if self.peek_char() == Some('=') {
237                    self.advance();
238                    Ok(Some(Token::MinusAssign))
239                } else if self.peek_char() == Some('>') {
240                    self.advance();
241                    Ok(Some(Token::Arrow))
242                } else {
243                    Ok(Some(Token::Minus))
244                }
245            }
246            '*' => {
247                if self.peek_char() == Some('=') {
248                    self.advance();
249                    Ok(Some(Token::MultiplyAssign))
250                } else {
251                    Ok(Some(Token::Multiply))
252                }
253            }
254            '/' => {
255                if self.peek_char() == Some('=') {
256                    self.advance();
257                    Ok(Some(Token::DivideAssign))
258                } else if self.peek_char() == Some('/') {
259                    self.advance();
260                    self.skip_line_comment();
261                    Ok(None)
262                } else if self.peek_char() == Some('*') {
263                    self.advance();
264                    self.skip_block_comment();
265                    Ok(None)
266                } else {
267                    Ok(Some(Token::Divide))
268                }
269            }
270            '%' => Ok(Some(Token::Modulo)),
271            '=' => {
272                if self.peek_char() == Some('=') {
273                    self.advance();
274                    if self.peek_char() == Some('=') {
275                        self.advance();
276                        Ok(Some(Token::StrictEqual))
277                    } else {
278                        Ok(Some(Token::Equal))
279                    }
280                } else {
281                    Ok(Some(Token::Assign))
282                }
283            }
284            '!' => {
285                if self.peek_char() == Some('=') {
286                    self.advance();
287                    if self.peek_char() == Some('=') {
288                        self.advance();
289                        Ok(Some(Token::StrictNotEqual))
290                    } else {
291                        Ok(Some(Token::NotEqual))
292                    }
293                } else {
294                    Ok(Some(Token::Not))
295                }
296            }
297            '<' => {
298                if self.peek_char() == Some('=') {
299                    self.advance();
300                    Ok(Some(Token::LessEqual))
301                } else {
302                    Ok(Some(Token::LessThan))
303                }
304            }
305            '>' => {
306                if self.peek_char() == Some('=') {
307                    self.advance();
308                    Ok(Some(Token::GreaterEqual))
309                } else {
310                    Ok(Some(Token::GreaterThan))
311                }
312            }
313            '&' => {
314                if self.peek_char() == Some('&') {
315                    self.advance();
316                    Ok(Some(Token::And))
317                } else {
318                    return Err(CompilerError::parse_error(
319                        self.line,
320                        self.column,
321                        "Unexpected character: &",
322                    ));
323                }
324            }
325            '|' => {
326                if self.peek_char() == Some('|') {
327                    self.advance();
328                    Ok(Some(Token::Or))
329                } else {
330                    return Err(CompilerError::parse_error(
331                        self.line,
332                        self.column,
333                        "Unexpected character: |",
334                    ));
335                }
336            }
337            '(' => Ok(Some(Token::LeftParen)),
338            ')' => Ok(Some(Token::RightParen)),
339            '{' => Ok(Some(Token::LeftBrace)),
340            '}' => Ok(Some(Token::RightBrace)),
341            '[' => Ok(Some(Token::LeftBracket)),
342            ']' => Ok(Some(Token::RightBracket)),
343            ';' => Ok(Some(Token::Semicolon)),
344            ',' => Ok(Some(Token::Comma)),
345            '.' => Ok(Some(Token::Dot)),
346            ':' => Ok(Some(Token::Colon)),
347            '?' => Ok(Some(Token::QuestionMark)),
348            '"' | '\'' => Ok(self.parse_string()?),
349            '`' => Ok(self.parse_template_literal()?),
350            '0'..='9' => Ok(self.parse_number()?),
351            'a'..='z' | 'A'..='Z' | '_' | '$' => Ok(self.parse_identifier_or_keyword()?),
352            _ => {
353                return Err(CompilerError::parse_error(
354                    self.line,
355                    self.column,
356                    format!("Unexpected character: {}", ch),
357                ));
358            }
359        };
360
361        // Only advance for simple tokens that don't manage position themselves
362        match ch {
363            'a'..='z' | 'A'..='Z' | '_' | '$' => {
364                // parse_identifier_or_keyword manages position itself
365            }
366            '0'..='9' => {
367                // parse_number manages position itself
368            }
369            '"' | '\'' => {
370                // parse_string manages position itself
371            }
372            _ => {
373                // Simple tokens need to advance
374                self.advance();
375            }
376        }
377        token
378    }
379
380    /// Get current character
381    fn current_char(&self) -> char {
382        self.input.chars().nth(self.position).unwrap_or('\0')
383    }
384
385    /// Peek at next character
386    fn peek_char(&self) -> Option<char> {
387        self.input.chars().nth(self.position + 1)
388    }
389
390    /// Advance position
391    fn advance(&mut self) {
392        if self.current_char() == '\n' {
393            self.line += 1;
394            self.column = 1;
395        } else {
396            self.column += 1;
397        }
398        self.position += 1;
399    }
400
401    /// Skip whitespace
402    fn skip_whitespace(&mut self) {
403        while self.position < self.input.len() {
404            let ch = self.current_char();
405            if ch.is_whitespace() {
406                self.advance();
407            } else if ch == '/' && self.peek_char() == Some('/') {
408                // Skip line comment
409                self.advance(); // skip first /
410                self.advance(); // skip second /
411                while self.position < self.input.len() && self.current_char() != '\n' {
412                    self.advance();
413                }
414            } else if ch == '/' && self.peek_char() == Some('*') {
415                // Skip block comment
416                self.advance(); // skip /
417                self.advance(); // skip *
418                while self.position < self.input.len() {
419                    if self.current_char() == '*' && self.peek_char() == Some('/') {
420                        self.advance(); // skip *
421                        self.advance(); // skip /
422                        break;
423                    }
424                    self.advance();
425                }
426            } else {
427                break;
428            }
429        }
430    }
431
432    /// Skip line comment
433    fn skip_line_comment(&mut self) -> Option<Token> {
434        while self.position < self.input.len() && self.current_char() != '\n' {
435            self.advance();
436        }
437        None
438    }
439
440    /// Skip block comment
441    fn skip_block_comment(&mut self) -> Option<Token> {
442        while self.position < self.input.len() {
443            if self.current_char() == '*' && self.peek_char() == Some('/') {
444                self.advance();
445                self.advance();
446                break;
447            }
448            self.advance();
449        }
450        None
451    }
452
453    /// Parse string literal
454    fn parse_string(&mut self) -> Result<Option<Token>> {
455        let quote = self.current_char();
456        let mut value = String::new();
457        self.advance();
458
459        while self.position < self.input.len() {
460            let ch = self.current_char();
461            if ch == quote {
462                self.advance();
463                return Ok(Some(Token::String(value)));
464            } else if ch == '\\' {
465                self.advance();
466                if self.position < self.input.len() {
467                    let escaped = self.current_char();
468                    value.push(match escaped {
469                        'n' => '\n',
470                        't' => '\t',
471                        'r' => '\r',
472                        '\\' => '\\',
473                        '"' => '"',
474                        '\'' => '\'',
475                        _ => escaped,
476                    });
477                    self.advance();
478                }
479            } else {
480                value.push(ch);
481                self.advance();
482            }
483        }
484
485        Err(CompilerError::parse_error(
486            self.line,
487            self.column,
488            "Unterminated string literal",
489        ))
490    }
491
492    /// Parse template literal
493    fn parse_template_literal(&mut self) -> Result<Option<Token>> {
494        let mut value = String::new();
495        self.advance(); // consume opening backtick
496
497        while self.position < self.input.len() {
498            let ch = self.current_char();
499            if ch == '`' {
500                self.advance();
501                return Ok(Some(Token::TemplateLiteral(value)));
502            } else if ch == '\\' {
503                self.advance();
504                if self.position < self.input.len() {
505                    let escaped = self.current_char();
506                    value.push(match escaped {
507                        'n' => '\n',
508                        't' => '\t',
509                        'r' => '\r',
510                        '\\' => '\\',
511                        '`' => '`',
512                        '$' => '$',
513                        _ => escaped,
514                    });
515                    self.advance();
516                }
517            } else {
518                value.push(ch);
519                self.advance();
520            }
521        }
522
523        Err(CompilerError::parse_error(
524            self.line,
525            self.column,
526            "Unterminated template literal",
527        ))
528    }
529
530    /// Parse number literal
531    fn parse_number(&mut self) -> Result<Option<Token>> {
532        let mut value = String::new();
533        let mut has_dot = false;
534
535        while self.position < self.input.len() {
536            let ch = self.current_char();
537            if ch.is_ascii_digit() {
538                value.push(ch);
539                self.advance();
540            } else if ch == '.' && !has_dot {
541                has_dot = true;
542                value.push(ch);
543                self.advance();
544            } else {
545                break;
546            }
547        }
548
549        let number: f64 = value.parse().map_err(|_| {
550            CompilerError::parse_error(self.line, self.column, "Invalid number literal")
551        })?;
552
553        Ok(Some(Token::Number(number)))
554    }
555
556    /// Parse identifier or keyword
557    fn parse_identifier_or_keyword(&mut self) -> Result<Option<Token>> {
558        let mut value = String::new();
559
560        while self.position < self.input.len() {
561            let ch = self.current_char();
562            if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
563                value.push(ch);
564                self.advance();
565            } else {
566                break;
567            }
568        }
569
570        // Check if it's a keyword
571        if let Some(keyword) = self.parse_keyword(&value) {
572            Ok(Some(Token::Keyword(keyword)))
573        } else {
574            Ok(Some(Token::Identifier(value)))
575        }
576    }
577
578    /// Parse keyword from string
579    fn parse_keyword(&self, value: &str) -> Option<Keyword> {
580        match value {
581            "let" => Some(Keyword::Let),
582            "const" => Some(Keyword::Const),
583            "var" => Some(Keyword::Var),
584            "function" => Some(Keyword::Function),
585            "class" => Some(Keyword::Class),
586            "interface" => Some(Keyword::Interface),
587            "type" => Some(Keyword::Type),
588            "enum" => Some(Keyword::Enum),
589            "namespace" => Some(Keyword::Namespace),
590            "module" => Some(Keyword::Module),
591            "import" => Some(Keyword::Import),
592            "export" => Some(Keyword::Export),
593            "from" => Some(Keyword::From),
594            "as" => Some(Keyword::As),
595            "default" => Some(Keyword::Default),
596            "if" => Some(Keyword::If),
597            "else" => Some(Keyword::Else),
598            "switch" => Some(Keyword::Switch),
599            "case" => Some(Keyword::Case),
600            "for" => Some(Keyword::For),
601            "while" => Some(Keyword::While),
602            "do" => Some(Keyword::Do),
603            "break" => Some(Keyword::Break),
604            "continue" => Some(Keyword::Continue),
605            "return" => Some(Keyword::Return),
606            "throw" => Some(Keyword::Throw),
607            "try" => Some(Keyword::Try),
608            "catch" => Some(Keyword::Catch),
609            "finally" => Some(Keyword::Finally),
610            "extends" => Some(Keyword::Extends),
611            "implements" => Some(Keyword::Implements),
612            "super" => Some(Keyword::Super),
613            "this" => Some(Keyword::This),
614            "new" => Some(Keyword::New),
615            "static" => Some(Keyword::Static),
616            "public" => Some(Keyword::Public),
617            "private" => Some(Keyword::Private),
618            "protected" => Some(Keyword::Protected),
619            "abstract" => Some(Keyword::Abstract),
620            "readonly" => Some(Keyword::Readonly),
621            "async" => Some(Keyword::Async),
622            "await" => Some(Keyword::Await),
623            "Promise" => Some(Keyword::Promise),
624            "any" => Some(Keyword::Any),
625            "unknown" => Some(Keyword::Unknown),
626            "never" => Some(Keyword::Never),
627            "void" => Some(Keyword::Void),
628            "null" => Some(Keyword::Null),
629            "undefined" => Some(Keyword::Undefined),
630            "boolean" => Some(Keyword::Boolean),
631            "number" => Some(Keyword::Number),
632            "string" => Some(Keyword::String),
633            "object" => Some(Keyword::Object),
634            "Array" => Some(Keyword::Array),
635            "true" => Some(Keyword::True),
636            "false" => Some(Keyword::False),
637            "in" => Some(Keyword::In),
638            "of" => Some(Keyword::Of),
639            "instanceof" => Some(Keyword::Instanceof),
640            "typeof" => Some(Keyword::Typeof),
641            "keyof" => Some(Keyword::Keyof),
642            "is" => Some(Keyword::Is),
643            "asserts" => Some(Keyword::Asserts),
644            "infer" => Some(Keyword::Infer),
645            "declare" => Some(Keyword::Declare),
646            "global" => Some(Keyword::Global),
647            _ => None,
648        }
649    }
650}