TypeScript_Rust_Compiler/
lexer.rs

1//! Lexical analysis for TypeScript code
2
3use crate::error::{CompilerError, Result};
4use serde::{Deserialize, Serialize};
5
6/// Token types for TypeScript
7#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub enum Token {
9    // Literals
10    Number(f64),
11    String(String),
12    TemplateLiteral(String),
13    Boolean(bool),
14    Null,
15    Undefined,
16
17    // Identifiers and keywords
18    Identifier(String),
19    Keyword(Keyword),
20
21    // Operators
22    Plus,
23    Minus,
24    Multiply,
25    Divide,
26    Modulo,
27    Equal,
28    NotEqual,
29    StrictEqual,
30    StrictNotEqual,
31    LessThan,
32    GreaterThan,
33    LessEqual,
34    GreaterEqual,
35    And,
36    Or,
37    Not,
38    Assign,
39    PlusAssign,
40    MinusAssign,
41    MultiplyAssign,
42    DivideAssign,
43
44    // Delimiters
45    LeftParen,
46    RightParen,
47    LeftBrace,
48    RightBrace,
49    LeftBracket,
50    RightBracket,
51    Semicolon,
52    Comma,
53    Dot,
54    Colon,
55    QuestionMark,
56    Arrow,
57
58    // Type annotations
59    TypeAnnotation,
60    GenericStart,
61    GenericEnd,
62
63    // Special
64    Newline,
65    Whitespace,
66    Comment(String),
67    EOF,
68}
69
70/// TypeScript keywords
71#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
72pub enum Keyword {
73    // Declarations
74    Let,
75    Const,
76    Var,
77    Function,
78    Class,
79    Interface,
80    Type,
81    Enum,
82    Namespace,
83    Module,
84    Import,
85    Export,
86    From,
87    As,
88    Default,
89
90    // Control flow
91    If,
92    Else,
93    Switch,
94    Case,
95    DefaultCase,
96    For,
97    While,
98    Do,
99    Break,
100    Continue,
101    Return,
102    Throw,
103    Try,
104    Catch,
105    Finally,
106
107    // OOP
108    Extends,
109    Implements,
110    Super,
111    This,
112    New,
113    Static,
114    Public,
115    Private,
116    Protected,
117    Abstract,
118    Readonly,
119
120    // Async
121    Async,
122    Await,
123    Promise,
124
125    // Types
126    Any,
127    Unknown,
128    Never,
129    Void,
130    Null,
131    Undefined,
132    Boolean,
133    Number,
134    String,
135    Object,
136    Array,
137    Tuple,
138    Union,
139    Intersection,
140    Literal,
141    Mapped,
142    Conditional,
143    Template,
144
145    // Utility types
146    Partial,
147    Required,
148    Pick,
149    Omit,
150    Record,
151    Exclude,
152    Extract,
153    NonNullable,
154    Parameters,
155    ReturnType,
156    InstanceType,
157    ThisParameterType,
158    OmitThisParameter,
159    ThisType,
160
161    // Other
162    True,
163    False,
164    In,
165    Of,
166    Instanceof,
167    Typeof,
168    Keyof,
169    Is,
170    Asserts,
171    Infer,
172    Declare,
173    Ambient,
174    Global,
175}
176
177/// Lexer for TypeScript code
178pub struct Lexer {
179    input: String,
180    position: usize,
181    line: usize,
182    column: usize,
183}
184
185impl Lexer {
186    /// Create a new lexer
187    pub fn new(input: String) -> Self {
188        Self {
189            input,
190            position: 0,
191            line: 1,
192            column: 1,
193        }
194    }
195
196    /// Tokenize the input string
197    pub fn tokenize(&mut self) -> Result<Vec<Token>> {
198        let mut tokens = Vec::new();
199
200        while self.position < self.input.len() {
201            match self.next_token()? {
202                Some(token) => {
203                    println!("Token: {:?}", token);
204                    tokens.push(token);
205                },
206                None => break,
207            }
208        }
209
210        tokens.push(Token::EOF);
211        Ok(tokens)
212    }
213
214    /// Get the next token
215    fn next_token(&mut self) -> Result<Option<Token>> {
216        self.skip_whitespace();
217
218        if self.position >= self.input.len() {
219            return Ok(None);
220        }
221
222        let ch = self.current_char();
223        let token = match ch {
224            '+' => {
225                if self.peek_char() == Some('=') {
226                    self.advance();
227                    Ok(Some(Token::PlusAssign))
228                } else if self.peek_char() == Some('+') {
229                    self.advance();
230                    Ok(Some(Token::Plus)) // ++ operator
231                } else {
232                    Ok(Some(Token::Plus))
233                }
234            }
235            '-' => {
236                if self.peek_char() == Some('=') {
237                    self.advance();
238                    Ok(Some(Token::MinusAssign))
239                } else if self.peek_char() == Some('>') {
240                    self.advance();
241                    Ok(Some(Token::Arrow))
242                } else {
243                    Ok(Some(Token::Minus))
244                }
245            }
246            '*' => {
247                if self.peek_char() == Some('=') {
248                    self.advance();
249                    Ok(Some(Token::MultiplyAssign))
250                } else {
251                    Ok(Some(Token::Multiply))
252                }
253            }
254            '/' => {
255                if self.peek_char() == Some('=') {
256                    self.advance();
257                    Ok(Some(Token::DivideAssign))
258                } else if self.peek_char() == Some('/') {
259                    self.advance();
260                    self.skip_line_comment();
261                    Ok(None)
262                } else if self.peek_char() == Some('*') {
263                    self.advance();
264                    self.skip_block_comment();
265                    Ok(None)
266                } else {
267                    Ok(Some(Token::Divide))
268                }
269            }
270            '%' => Ok(Some(Token::Modulo)),
271            '=' => {
272                if self.peek_char() == Some('=') {
273                    self.advance();
274                    if self.peek_char() == Some('=') {
275                        self.advance();
276                        Ok(Some(Token::StrictEqual))
277                    } else {
278                        Ok(Some(Token::Equal))
279                    }
280                } else {
281                    Ok(Some(Token::Assign))
282                }
283            }
284            '!' => {
285                if self.peek_char() == Some('=') {
286                    self.advance();
287                    if self.peek_char() == Some('=') {
288                        self.advance();
289                        Ok(Some(Token::StrictNotEqual))
290                    } else {
291                        Ok(Some(Token::NotEqual))
292                    }
293                } else {
294                    Ok(Some(Token::Not))
295                }
296            }
297            '<' => {
298                if self.peek_char() == Some('=') {
299                    self.advance();
300                    Ok(Some(Token::LessEqual))
301                } else {
302                    Ok(Some(Token::LessThan))
303                }
304            }
305            '>' => {
306                if self.peek_char() == Some('=') {
307                    self.advance();
308                    Ok(Some(Token::GreaterEqual))
309                } else {
310                    Ok(Some(Token::GreaterThan))
311                }
312            }
313            '&' => {
314                if self.peek_char() == Some('&') {
315                    self.advance();
316                    Ok(Some(Token::And))
317                } else {
318                    return Err(CompilerError::parse_error(
319                        self.line,
320                        self.column,
321                        "Unexpected character: &",
322                    ));
323                }
324            }
325            '|' => {
326                if self.peek_char() == Some('|') {
327                    self.advance();
328                    Ok(Some(Token::Or))
329                } else {
330                    return Err(CompilerError::parse_error(
331                        self.line,
332                        self.column,
333                        "Unexpected character: |",
334                    ));
335                }
336            }
337            '(' => Ok(Some(Token::LeftParen)),
338            ')' => Ok(Some(Token::RightParen)),
339            '{' => Ok(Some(Token::LeftBrace)),
340            '}' => Ok(Some(Token::RightBrace)),
341            '[' => Ok(Some(Token::LeftBracket)),
342            ']' => Ok(Some(Token::RightBracket)),
343            ';' => Ok(Some(Token::Semicolon)),
344            ',' => Ok(Some(Token::Comma)),
345            '.' => Ok(Some(Token::Dot)),
346            ':' => Ok(Some(Token::Colon)),
347            '?' => Ok(Some(Token::QuestionMark)),
348            '"' | '\'' => Ok(self.parse_string()?),
349            '`' => Ok(self.parse_template_literal()?),
350            '0'..='9' => Ok(self.parse_number()?),
351            'a'..='z' | 'A'..='Z' | '_' | '$' => Ok(self.parse_identifier_or_keyword()?),
352            _ => {
353                return Err(CompilerError::parse_error(
354                    self.line,
355                    self.column,
356                    format!("Unexpected character: {}", ch),
357                ));
358            }
359        };
360
361        // Only advance for simple tokens that don't manage position themselves
362        match ch {
363            'a'..='z' | 'A'..='Z' | '_' | '$' => {
364                // parse_identifier_or_keyword manages position itself
365            }
366            '0'..='9' => {
367                // parse_number manages position itself  
368            }
369            '"' | '\'' => {
370                // parse_string manages position itself
371            }
372            _ => {
373                // Simple tokens need to advance
374                self.advance();
375            }
376        }
377        token
378    }
379
380    /// Get current character
381    fn current_char(&self) -> char {
382        self.input.chars().nth(self.position).unwrap_or('\0')
383    }
384
385    /// Peek at next character
386    fn peek_char(&self) -> Option<char> {
387        self.input.chars().nth(self.position + 1)
388    }
389
390    /// Advance position
391    fn advance(&mut self) {
392        if self.current_char() == '\n' {
393            self.line += 1;
394            self.column = 1;
395        } else {
396            self.column += 1;
397        }
398        self.position += 1;
399    }
400
401    /// Skip whitespace
402    fn skip_whitespace(&mut self) {
403        while self.position < self.input.len() {
404            let ch = self.current_char();
405            if ch.is_whitespace() {
406                self.advance();
407            } else {
408                break;
409            }
410        }
411    }
412
413    /// Skip line comment
414    fn skip_line_comment(&mut self) -> Option<Token> {
415        while self.position < self.input.len() && self.current_char() != '\n' {
416            self.advance();
417        }
418        None
419    }
420
421    /// Skip block comment
422    fn skip_block_comment(&mut self) -> Option<Token> {
423        while self.position < self.input.len() {
424            if self.current_char() == '*' && self.peek_char() == Some('/') {
425                self.advance();
426                self.advance();
427                break;
428            }
429            self.advance();
430        }
431        None
432    }
433
434    /// Parse string literal
435    fn parse_string(&mut self) -> Result<Option<Token>> {
436        let quote = self.current_char();
437        let mut value = String::new();
438        self.advance();
439
440        while self.position < self.input.len() {
441            let ch = self.current_char();
442            if ch == quote {
443                self.advance();
444                return Ok(Some(Token::String(value)));
445            } else if ch == '\\' {
446                self.advance();
447                if self.position < self.input.len() {
448                    let escaped = self.current_char();
449                    value.push(match escaped {
450                        'n' => '\n',
451                        't' => '\t',
452                        'r' => '\r',
453                        '\\' => '\\',
454                        '"' => '"',
455                        '\'' => '\'',
456                        _ => escaped,
457                    });
458                    self.advance();
459                }
460            } else {
461                value.push(ch);
462                self.advance();
463            }
464        }
465
466        Err(CompilerError::parse_error(
467            self.line,
468            self.column,
469            "Unterminated string literal",
470        ))
471    }
472
473    /// Parse template literal
474    fn parse_template_literal(&mut self) -> Result<Option<Token>> {
475        let mut value = String::new();
476        self.advance(); // consume opening backtick
477
478        while self.position < self.input.len() {
479            let ch = self.current_char();
480            if ch == '`' {
481                self.advance();
482                return Ok(Some(Token::TemplateLiteral(value)));
483            } else if ch == '\\' {
484                self.advance();
485                if self.position < self.input.len() {
486                    let escaped = self.current_char();
487                    value.push(match escaped {
488                        'n' => '\n',
489                        't' => '\t',
490                        'r' => '\r',
491                        '\\' => '\\',
492                        '`' => '`',
493                        '$' => '$',
494                        _ => escaped,
495                    });
496                    self.advance();
497                }
498            } else {
499                value.push(ch);
500                self.advance();
501            }
502        }
503
504        Err(CompilerError::parse_error(
505            self.line,
506            self.column,
507            "Unterminated template literal",
508        ))
509    }
510
511    /// Parse number literal
512    fn parse_number(&mut self) -> Result<Option<Token>> {
513        let mut value = String::new();
514        let mut has_dot = false;
515
516        while self.position < self.input.len() {
517            let ch = self.current_char();
518            if ch.is_ascii_digit() {
519                value.push(ch);
520                self.advance();
521            } else if ch == '.' && !has_dot {
522                has_dot = true;
523                value.push(ch);
524                self.advance();
525            } else {
526                break;
527            }
528        }
529
530        let number: f64 = value.parse().map_err(|_| {
531            CompilerError::parse_error(self.line, self.column, "Invalid number literal")
532        })?;
533
534        Ok(Some(Token::Number(number)))
535    }
536
537    /// Parse identifier or keyword
538    fn parse_identifier_or_keyword(&mut self) -> Result<Option<Token>> {
539        let mut value = String::new();
540
541        while self.position < self.input.len() {
542            let ch = self.current_char();
543            if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
544                value.push(ch);
545                self.advance();
546            } else {
547                break;
548            }
549        }
550
551        // Check if it's a keyword
552        if let Some(keyword) = self.parse_keyword(&value) {
553            Ok(Some(Token::Keyword(keyword)))
554        } else {
555            Ok(Some(Token::Identifier(value)))
556        }
557    }
558
559    /// Parse keyword from string
560    fn parse_keyword(&self, value: &str) -> Option<Keyword> {
561        match value {
562            "let" => Some(Keyword::Let),
563            "const" => Some(Keyword::Const),
564            "var" => Some(Keyword::Var),
565            "function" => Some(Keyword::Function),
566            "class" => Some(Keyword::Class),
567            "interface" => Some(Keyword::Interface),
568            "type" => Some(Keyword::Type),
569            "enum" => Some(Keyword::Enum),
570            "namespace" => Some(Keyword::Namespace),
571            "module" => Some(Keyword::Module),
572            "import" => Some(Keyword::Import),
573            "export" => Some(Keyword::Export),
574            "from" => Some(Keyword::From),
575            "as" => Some(Keyword::As),
576            "default" => Some(Keyword::Default),
577            "if" => Some(Keyword::If),
578            "else" => Some(Keyword::Else),
579            "switch" => Some(Keyword::Switch),
580            "case" => Some(Keyword::Case),
581            "for" => Some(Keyword::For),
582            "while" => Some(Keyword::While),
583            "do" => Some(Keyword::Do),
584            "break" => Some(Keyword::Break),
585            "continue" => Some(Keyword::Continue),
586            "return" => Some(Keyword::Return),
587            "throw" => Some(Keyword::Throw),
588            "try" => Some(Keyword::Try),
589            "catch" => Some(Keyword::Catch),
590            "finally" => Some(Keyword::Finally),
591            "extends" => Some(Keyword::Extends),
592            "implements" => Some(Keyword::Implements),
593            "super" => Some(Keyword::Super),
594            "this" => Some(Keyword::This),
595            "new" => Some(Keyword::New),
596            "static" => Some(Keyword::Static),
597            "public" => Some(Keyword::Public),
598            "private" => Some(Keyword::Private),
599            "protected" => Some(Keyword::Protected),
600            "abstract" => Some(Keyword::Abstract),
601            "readonly" => Some(Keyword::Readonly),
602            "async" => Some(Keyword::Async),
603            "await" => Some(Keyword::Await),
604            "Promise" => Some(Keyword::Promise),
605            "any" => Some(Keyword::Any),
606            "unknown" => Some(Keyword::Unknown),
607            "never" => Some(Keyword::Never),
608            "void" => Some(Keyword::Void),
609            "null" => Some(Keyword::Null),
610            "undefined" => Some(Keyword::Undefined),
611            "boolean" => Some(Keyword::Boolean),
612            "number" => Some(Keyword::Number),
613            "string" => Some(Keyword::String),
614            "object" => Some(Keyword::Object),
615            "Array" => Some(Keyword::Array),
616            "true" => Some(Keyword::True),
617            "false" => Some(Keyword::False),
618            "in" => Some(Keyword::In),
619            "of" => Some(Keyword::Of),
620            "instanceof" => Some(Keyword::Instanceof),
621            "typeof" => Some(Keyword::Typeof),
622            "keyof" => Some(Keyword::Keyof),
623            "is" => Some(Keyword::Is),
624            "asserts" => Some(Keyword::Asserts),
625            "infer" => Some(Keyword::Infer),
626            "declare" => Some(Keyword::Declare),
627            "global" => Some(Keyword::Global),
628            _ => None,
629        }
630    }
631}