Skip to main content

pipa/compiler/
lexer.rs

1#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2pub enum TokenType {
3    Eof,
4    Identifier,
5    Keyword,
6    Number,
7    BigInt,
8    String,
9    Punctuator,
10    Template,
11    Regex,
12    Hashbang,
13    PrivateIdentifier,
14
15    NoSubstitutionTemplate,
16
17    TemplateHead,
18
19    TemplateMiddle,
20
21    TemplateTail,
22}
23
24#[derive(Debug, Clone)]
25pub struct Token {
26    pub token_type: TokenType,
27    pub value: String,
28    pub line: u32,
29    pub column: u32,
30}
31
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum LastTokenKind {
34    None,
35
36    Dividend,
37
38    RegexPrefix,
39}
40
41#[derive(Debug, Clone)]
42struct PeekCache {
43    pre_pos: usize,
44    pre_line: u32,
45    pre_column: u32,
46    pre_last_token_kind: LastTokenKind,
47    post_pos: usize,
48    post_line: u32,
49    post_column: u32,
50    post_last_token_kind: LastTokenKind,
51    token: Token,
52}
53
54pub struct Lexer {
55    pub source: Vec<char>,
56    pub pos: usize,
57    pub line: u32,
58    pub column: u32,
59
60    last_token_kind: LastTokenKind,
61
62    cached_peek: Option<PeekCache>,
63
64    pub last_string_had_escape: bool,
65}
66
67impl Lexer {
68    pub fn new(source: &str) -> Self {
69        Lexer {
70            source: source.chars().collect(),
71            pos: 0,
72            line: 1,
73            column: 1,
74            last_token_kind: LastTokenKind::None,
75            cached_peek: None,
76            last_string_had_escape: false,
77        }
78    }
79
80    pub fn next_token(&mut self) -> Option<Token> {
81        if let Some(cache) = self.cached_peek.take() {
82            if cache.pre_pos == self.pos
83                && cache.pre_line == self.line
84                && cache.pre_column == self.column
85                && cache.pre_last_token_kind == self.last_token_kind
86            {
87                self.pos = cache.post_pos;
88                self.line = cache.post_line;
89                self.column = cache.post_column;
90                self.last_token_kind = cache.post_last_token_kind;
91                return Some(cache.token);
92            }
93        }
94
95        self.next_token_uncached()
96    }
97
98    fn next_token_uncached(&mut self) -> Option<Token> {
99        self.skip_whitespace()?;
100        if self.pos >= self.source.len() {
101            return None;
102        }
103
104        if self.pos == 0 && self.source[self.pos] == '#' {
105            if self.pos + 1 < self.source.len() && self.source[self.pos + 1] == '!' {
106                let token = self.read_hashbang();
107                self.last_token_kind = LastTokenKind::RegexPrefix;
108                return Some(token);
109            }
110        }
111
112        let c = self.source[self.pos];
113
114        if c.is_ascii_digit() {
115            let token = self.read_number();
116            self.last_token_kind = LastTokenKind::Dividend;
117            return Some(token);
118        }
119
120        if c == '.'
121            && self.pos + 1 < self.source.len()
122            && self.source[self.pos + 1].is_ascii_digit()
123        {
124            let token = self.read_number();
125            self.last_token_kind = LastTokenKind::Dividend;
126            return Some(token);
127        }
128
129        if c == '"' || c == '\'' {
130            let token = self.read_string(c);
131            self.last_token_kind = LastTokenKind::Dividend;
132            return Some(token);
133        }
134
135        if c == '`' {
136            self.advance();
137            let (value, terminated_by_interp) = self.scan_template_segment();
138            self.last_token_kind = LastTokenKind::Dividend;
139            let token_type = if terminated_by_interp {
140                TokenType::TemplateHead
141            } else {
142                TokenType::NoSubstitutionTemplate
143            };
144            return Some(Token {
145                token_type,
146                value,
147                line: self.line,
148                column: self.column,
149            });
150        }
151
152        if Self::is_identifier_start(c) || c == '\\' {
153            let token = self.read_identifier();
154            self.last_token_kind = if matches!(
155                token.value.as_str(),
156                "return"
157                    | "throw"
158                    | "case"
159                    | "typeof"
160                    | "void"
161                    | "new"
162                    | "delete"
163                    | "in"
164                    | "instanceof"
165                    | "yield"
166            ) {
167                LastTokenKind::RegexPrefix
168            } else {
169                LastTokenKind::Dividend
170            };
171            return Some(token);
172        }
173
174        if c == '#' && self.pos > 0 {
175            if self.pos + 1 < self.source.len() {
176                let next = self.source[self.pos + 1];
177                if Self::is_identifier_start(next) || next == '\\' {
178                    let token = self.read_private_identifier();
179                    self.last_token_kind = LastTokenKind::Dividend;
180                    return Some(token);
181                }
182            }
183        }
184
185        if c == '/' {
186            let token = self.read_comment_or_regex();
187            if token.token_type == TokenType::Eof {
188                return None;
189            }
190            return Some(token);
191        }
192
193        let token = self.read_punctuator();
194        self.last_token_kind = match token.value.as_str() {
195            ")" | "]" | "}" | "++" | "--" => LastTokenKind::Dividend,
196            _ => LastTokenKind::RegexPrefix,
197        };
198        Some(token)
199    }
200
201    pub fn set_pos(&mut self, pos: usize) {
202        self.pos = pos;
203
204        self.cached_peek = None;
205        self.last_token_kind = LastTokenKind::None;
206    }
207
208    pub fn pos(&self) -> usize {
209        self.pos
210    }
211
212    pub fn column(&self) -> u32 {
213        self.column
214    }
215
216    pub fn set_column(&mut self, col: u32) {
217        self.column = col;
218    }
219
220    pub fn last_token_kind(&self) -> LastTokenKind {
221        self.last_token_kind
222    }
223
224    pub fn set_last_token_kind(&mut self, kind: LastTokenKind) {
225        self.last_token_kind = kind;
226    }
227
228    pub fn line(&self) -> u32 {
229        self.line
230    }
231
232    pub fn set_line(&mut self, line: u32) {
233        self.line = line;
234    }
235
236    pub fn get_current_line(&self) -> String {
237        let mut line_start = self.pos;
238        while line_start > 0 && self.source[line_start - 1] != '\n' {
239            line_start -= 1;
240        }
241
242        let mut line_end = self.pos;
243        while line_end < self.source.len() && self.source[line_end] != '\n' {
244            line_end += 1;
245        }
246
247        self.source[line_start..line_end].iter().collect()
248    }
249
250    pub fn peek(&mut self) -> Option<Token> {
251        if let Some(cache) = self.cached_peek.as_ref() {
252            if cache.pre_pos == self.pos
253                && cache.pre_line == self.line
254                && cache.pre_column == self.column
255                && cache.pre_last_token_kind == self.last_token_kind
256            {
257                return Some(cache.token.clone());
258            }
259        }
260
261        let old_pos = self.pos;
262        let old_line = self.line;
263        let old_column = self.column;
264        let old_last_token_kind = self.last_token_kind;
265
266        self.cached_peek = None;
267        let result = self.next_token_uncached();
268
269        let new_pos = self.pos;
270        let new_line = self.line;
271        let new_column = self.column;
272        let new_last_token_kind = self.last_token_kind;
273
274        self.pos = old_pos;
275        self.line = old_line;
276        self.column = old_column;
277        self.last_token_kind = old_last_token_kind;
278
279        if let Some(token) = result {
280            self.cached_peek = Some(PeekCache {
281                pre_pos: old_pos,
282                pre_line: old_line,
283                pre_column: old_column,
284                pre_last_token_kind: old_last_token_kind,
285                post_pos: new_pos,
286                post_line: new_line,
287                post_column: new_column,
288                post_last_token_kind: new_last_token_kind,
289                token: token.clone(),
290            });
291            return Some(token);
292        }
293
294        None
295    }
296
297    fn skip_whitespace(&mut self) -> Option<()> {
298        while self.pos < self.source.len() {
299            match self.source[self.pos] {
300                '\n' | '\r' | '\u{2028}' | '\u{2029}' => {
301                    self.line += 1;
302                    self.column = 1;
303                    self.pos += 1;
304                }
305                ' ' | '\t' | '\u{000B}' | '\u{000C}' | '\u{00A0}' | '\u{1680}' | '\u{202F}'
306                | '\u{205F}' | '\u{3000}' | '\u{FEFF}' => {
307                    self.column += 1;
308                    self.pos += 1;
309                }
310                c if (c as u32) >= 0x2000 && (c as u32) <= 0x200A => {
311                    self.column += 1;
312                    self.pos += 1;
313                }
314                _ => break,
315            }
316        }
317        Some(())
318    }
319
320    fn advance(&mut self) {
321        if self.pos < self.source.len() {
322            if self.source[self.pos] == '\n' || self.source[self.pos] == '\r' {
323                self.line += 1;
324                self.column = 1;
325            } else {
326                self.column += 1;
327            }
328            self.pos += 1;
329        }
330    }
331
332    fn read_number(&mut self) -> Token {
333        let start = self.pos;
334
335        if self.source[self.pos] == '0'
336            && self.pos + 1 < self.source.len()
337            && (self.source[self.pos + 1] == 'x' || self.source[self.pos + 1] == 'X')
338        {
339            self.advance();
340            self.advance();
341            while self.pos < self.source.len() {
342                if self.source[self.pos].is_ascii_hexdigit() {
343                    self.advance();
344                } else if self.source[self.pos] == '_' {
345                    if self.pos + 1 < self.source.len()
346                        && (self.source[self.pos + 1].is_ascii_hexdigit())
347                    {
348                        self.advance();
349                    } else {
350                        break;
351                    }
352                } else {
353                    break;
354                }
355            }
356            let is_bigint = self.pos < self.source.len() && self.source[self.pos] == 'n';
357            if is_bigint {
358                self.advance();
359            }
360            let value: String = self.source[start..self.pos].iter().collect();
361            return Token {
362                token_type: if is_bigint {
363                    TokenType::BigInt
364                } else {
365                    TokenType::Number
366                },
367                value,
368                line: self.line,
369                column: self.column,
370            };
371        }
372
373        if self.source[self.pos] == '0'
374            && self.pos + 1 < self.source.len()
375            && (self.source[self.pos + 1] == 'o' || self.source[self.pos + 1] == 'O')
376        {
377            self.advance();
378            self.advance();
379            while self.pos < self.source.len() {
380                if matches!(self.source[self.pos], '0'..='7') {
381                    self.advance();
382                } else if self.source[self.pos] == '_' {
383                    if self.pos + 1 < self.source.len()
384                        && matches!(self.source[self.pos + 1], '0'..='7')
385                    {
386                        self.advance();
387                    } else {
388                        break;
389                    }
390                } else {
391                    break;
392                }
393            }
394            let is_bigint = self.pos < self.source.len() && self.source[self.pos] == 'n';
395            if is_bigint {
396                self.advance();
397            }
398            let value: String = self.source[start..self.pos].iter().collect();
399            return Token {
400                token_type: if is_bigint {
401                    TokenType::BigInt
402                } else {
403                    TokenType::Number
404                },
405                value,
406                line: self.line,
407                column: self.column,
408            };
409        }
410
411        if self.source[self.pos] == '0'
412            && self.pos + 1 < self.source.len()
413            && (self.source[self.pos + 1] == 'b' || self.source[self.pos + 1] == 'B')
414        {
415            self.advance();
416            self.advance();
417            while self.pos < self.source.len() {
418                if matches!(self.source[self.pos], '0' | '1') {
419                    self.advance();
420                } else if self.source[self.pos] == '_' {
421                    if self.pos + 1 < self.source.len()
422                        && matches!(self.source[self.pos + 1], '0' | '1')
423                    {
424                        self.advance();
425                    } else {
426                        break;
427                    }
428                } else {
429                    break;
430                }
431            }
432            let is_bigint = self.pos < self.source.len() && self.source[self.pos] == 'n';
433            if is_bigint {
434                self.advance();
435            }
436            let value: String = self.source[start..self.pos].iter().collect();
437            return Token {
438                token_type: if is_bigint {
439                    TokenType::BigInt
440                } else {
441                    TokenType::Number
442                },
443                value,
444                line: self.line,
445                column: self.column,
446            };
447        }
448
449        let mut has_dot = false;
450        while self.pos < self.source.len() {
451            if self.source[self.pos].is_ascii_digit() {
452                self.advance();
453            } else if self.source[self.pos] == '_' {
454                if self.pos + 1 < self.source.len() && self.source[self.pos + 1].is_ascii_digit() {
455                    self.advance();
456                } else {
457                    break;
458                }
459            } else if self.source[self.pos] == '.' && !has_dot {
460                has_dot = true;
461                self.advance();
462            } else {
463                break;
464            }
465        }
466
467        if self.pos < self.source.len()
468            && (self.source[self.pos] == 'e' || self.source[self.pos] == 'E')
469        {
470            self.advance();
471            if self.pos < self.source.len()
472                && (self.source[self.pos] == '+' || self.source[self.pos] == '-')
473            {
474                self.advance();
475            }
476            while self.pos < self.source.len() {
477                if self.source[self.pos].is_ascii_digit() {
478                    self.advance();
479                } else if self.source[self.pos] == '_' {
480                    if self.pos + 1 < self.source.len()
481                        && self.source[self.pos + 1].is_ascii_digit()
482                    {
483                        self.advance();
484                    } else {
485                        break;
486                    }
487                } else {
488                    break;
489                }
490            }
491        }
492
493        let value_end = self.pos;
494        let is_bigint = self.pos < self.source.len() && self.source[self.pos] == 'n';
495        if is_bigint {
496            self.advance();
497        }
498        let value: String = self.source[start..value_end].iter().collect();
499        Token {
500            token_type: if is_bigint {
501                TokenType::BigInt
502            } else {
503                TokenType::Number
504            },
505            value,
506            line: self.line,
507            column: self.column,
508        }
509    }
510
511    pub fn get_context(&self, chars: usize) -> String {
512        let start = self.pos.saturating_sub(chars);
513        let end = (self.pos + chars).min(self.source.len());
514        self.source[start..end].iter().collect()
515    }
516
517    fn read_string(&mut self, quote: char) -> Token {
518        self.last_string_had_escape = false;
519        self.advance();
520        let mut value = String::new();
521        while self.pos < self.source.len() && self.source[self.pos] != quote {
522            let ch = self.source[self.pos];
523            if ch != '\\' {
524                value.push(ch);
525                self.advance();
526                continue;
527            }
528
529            self.advance();
530            if self.pos >= self.source.len() {
531                break;
532            }
533
534            self.last_string_had_escape = true;
535            let esc = self.source[self.pos];
536            match esc {
537                '\n' | '\u{2028}' | '\u{2029}' => {}
538                '\r' => {
539                    if self.pos + 1 < self.source.len() && self.source[self.pos + 1] == '\n' {
540                        self.advance();
541                    }
542                }
543                'n' => value.push('\n'),
544                'r' => value.push('\r'),
545                't' => value.push('\t'),
546                'b' => value.push('\x08'),
547                'f' => value.push('\x0c'),
548                'v' => value.push('\x0b'),
549                '0'..='7' => {
550                    let first_val = esc as u32 - '0' as u32;
551                    if first_val == 0
552                        && (self.pos + 1 >= self.source.len()
553                            || self.source[self.pos + 1] < '0'
554                            || self.source[self.pos + 1] > '7')
555                    {
556                        value.push('\0');
557                    } else {
558                        let mut code = first_val;
559                        let mut count = 1u32;
560                        let mut lookahead = 1;
561                        while self.pos + lookahead < self.source.len()
562                            && count < 3
563                            && self.source[self.pos + lookahead] >= '0'
564                            && self.source[self.pos + lookahead] <= '7'
565                        {
566                            let next =
567                                code * 8 + (self.source[self.pos + lookahead] as u32 - '0' as u32);
568                            if next > 255 {
569                                break;
570                            }
571                            code = next;
572                            count += 1;
573                            lookahead += 1;
574                        }
575                        for _ in 1..lookahead {
576                            self.advance();
577                        }
578                        if let Some(ch) = char::from_u32(code) {
579                            value.push(ch);
580                        }
581                    }
582                }
583                '\\' => value.push('\\'),
584                '\'' => value.push('\''),
585                '"' => value.push('"'),
586                '`' => value.push('`'),
587                'x' => {
588                    if self.pos + 2 < self.source.len() {
589                        let h1 = self.source[self.pos + 1];
590                        let h2 = self.source[self.pos + 2];
591                        if let (Some(a), Some(b)) = (h1.to_digit(16), h2.to_digit(16)) {
592                            let code = (a << 4) | b;
593                            if let Some(c) = char::from_u32(code) {
594                                value.push(c);
595                            }
596                            self.advance();
597                            self.advance();
598                        } else {
599                            value.push('x');
600                        }
601                    } else {
602                        value.push('x');
603                    }
604                }
605                'u' => {
606                    if self.pos + 1 < self.source.len() && self.source[self.pos + 1] == '{' {
607                        let mut code: u32 = 0;
608                        let mut i = self.pos + 2;
609                        while i < self.source.len() && self.source[i] != '}' {
610                            if let Some(d) = self.source[i].to_digit(16) {
611                                code = code.wrapping_mul(16).wrapping_add(d);
612                                i += 1;
613                            } else {
614                                break;
615                            }
616                        }
617                        if i < self.source.len() && self.source[i] == '}' {
618                            if let Some(decoded) = char::from_u32(code) {
619                                value.push(decoded);
620                            }
621
622                            let steps = i - self.pos;
623                            for _ in 0..steps {
624                                self.advance();
625                            }
626                        } else {
627                            value.push('u');
628                        }
629                    } else if self.pos + 4 < self.source.len() {
630                        let h1 = self.source[self.pos + 1];
631                        let h2 = self.source[self.pos + 2];
632                        let h3 = self.source[self.pos + 3];
633                        let h4 = self.source[self.pos + 4];
634                        if let (Some(a), Some(b), Some(c), Some(d)) = (
635                            h1.to_digit(16),
636                            h2.to_digit(16),
637                            h3.to_digit(16),
638                            h4.to_digit(16),
639                        ) {
640                            let code = (a << 12) | (b << 8) | (c << 4) | d;
641                            if let Some(decoded) = char::from_u32(code) {
642                                value.push(decoded);
643                            }
644                            self.advance();
645                            self.advance();
646                            self.advance();
647                            self.advance();
648                        } else {
649                            value.push('u');
650                        }
651                    } else {
652                        value.push('u');
653                    }
654                }
655                _ => value.push(esc),
656            }
657            self.advance();
658        }
659        if self.pos < self.source.len() {
660            self.advance();
661        }
662        Token {
663            token_type: TokenType::String,
664            value,
665            line: self.line,
666            column: self.column,
667        }
668    }
669
670    fn read_hashbang(&mut self) -> Token {
671        let start = self.pos;
672
673        self.pos += 2;
674
675        while self.pos < self.source.len() {
676            let c = self.source[self.pos];
677            if c == '\n' || c == '\r' {
678                break;
679            }
680            self.pos += 1;
681        }
682        let value: String = self.source[start..self.pos].iter().collect();
683        Token {
684            token_type: TokenType::Hashbang,
685            value,
686            line: self.line,
687            column: self.column,
688        }
689    }
690
691    fn read_identifier(&mut self) -> Token {
692        let start = self.pos;
693        let mut value = String::new();
694        while self.pos < self.source.len() {
695            let c = self.source[self.pos];
696            if Self::is_identifier_part(c) {
697                value.push(c);
698                self.advance();
699            } else if c == '\\' {
700                if let Some(ch) = self.read_identifier_escape() {
701                    value.push(ch);
702                } else {
703                    break;
704                }
705            } else {
706                break;
707            }
708        }
709        if value.is_empty() {
710            value = self.source[start..self.pos].iter().collect();
711        }
712        let token_type = if Self::is_keyword(&value) {
713            TokenType::Keyword
714        } else {
715            TokenType::Identifier
716        };
717        Token {
718            token_type,
719            value,
720            line: self.line,
721            column: self.column,
722        }
723    }
724
725    fn read_identifier_escape(&mut self) -> Option<char> {
726        if self.pos >= self.source.len() || self.source[self.pos] != '\\' {
727            return None;
728        }
729        self.advance();
730        if self.pos >= self.source.len() || self.source[self.pos] != 'u' {
731            return None;
732        }
733        self.advance();
734
735        if self.pos < self.source.len() && self.source[self.pos] == '{' {
736            self.advance();
737            let mut hex = String::new();
738            while self.pos < self.source.len() && self.source[self.pos] != '}' {
739                let c = self.source[self.pos];
740                if !c.is_ascii_hexdigit() {
741                    return None;
742                }
743                hex.push(c);
744                self.advance();
745            }
746            if self.pos >= self.source.len() || self.source[self.pos] != '}' {
747                return None;
748            }
749            self.advance();
750            let code = u32::from_str_radix(&hex, 16).ok()?;
751            return char::from_u32(code);
752        }
753
754        if self.pos + 3 >= self.source.len() {
755            return None;
756        }
757        let mut code: u32 = 0;
758        for _ in 0..4 {
759            let c = self.source[self.pos];
760            let d = c.to_digit(16)?;
761            code = (code << 4) | d;
762            self.advance();
763        }
764        char::from_u32(code)
765    }
766
767    fn read_private_identifier(&mut self) -> Token {
768        self.advance();
769        let mut value = String::from("#");
770        while self.pos < self.source.len() {
771            let c = self.source[self.pos];
772            if Self::is_identifier_part(c) {
773                value.push(c);
774                self.advance();
775            } else if c == '\\' {
776                if let Some(ch) = self.read_identifier_escape() {
777                    value.push(ch);
778                } else {
779                    break;
780                }
781            } else {
782                break;
783            }
784        }
785        Token {
786            token_type: TokenType::PrivateIdentifier,
787            value,
788            line: self.line,
789            column: self.column,
790        }
791    }
792
793    fn read_comment_or_regex(&mut self) -> Token {
794        self.advance();
795        if self.pos < self.source.len() {
796            if self.source[self.pos] == '/' {
797                while self.pos < self.source.len()
798                    && self.source[self.pos] != '\n'
799                    && self.source[self.pos] != '\r'
800                    && self.source[self.pos] != '\u{2028}'
801                    && self.source[self.pos] != '\u{2029}'
802                {
803                    self.advance();
804                }
805                return self.next_token().unwrap_or(Token {
806                    token_type: TokenType::Eof,
807                    value: String::new(),
808                    line: self.line,
809                    column: self.column,
810                });
811            }
812
813            if self.source[self.pos] == '*' {
814                self.advance();
815                while self.pos < self.source.len() {
816                    if self.source[self.pos] == '*'
817                        && self.pos + 1 < self.source.len()
818                        && self.source[self.pos + 1] == '/'
819                    {
820                        self.advance();
821                        self.advance();
822                        break;
823                    }
824                    self.advance();
825                }
826                return self.next_token().unwrap_or(Token {
827                    token_type: TokenType::Eof,
828                    value: String::new(),
829                    line: self.line,
830                    column: self.column,
831                });
832            }
833
834            let next_char = self.source[self.pos];
835            if self.last_token_kind == LastTokenKind::Dividend {
836                if next_char == '=' {
837                    self.advance();
838                    self.last_token_kind = LastTokenKind::Dividend;
839                    return Token {
840                        token_type: TokenType::Punctuator,
841                        value: "/=".to_string(),
842                        line: self.line,
843                        column: self.column,
844                    };
845                }
846
847                self.last_token_kind = LastTokenKind::RegexPrefix;
848                return Token {
849                    token_type: TokenType::Punctuator,
850                    value: "/".to_string(),
851                    line: self.line,
852                    column: self.column,
853                };
854            }
855
856            let mut pattern = String::new();
857            let mut flags = String::new();
858
859            while self.pos < self.source.len() {
860                let c = self.source[self.pos];
861                if c == '/' {
862                    self.advance();
863                    break;
864                } else if c == '\\' {
865                    pattern.push(c);
866                    self.advance();
867                    if self.pos < self.source.len() {
868                        pattern.push(self.source[self.pos]);
869                        self.advance();
870                    }
871                } else if c == '[' {
872                    pattern.push(c);
873                    self.advance();
874                    while self.pos < self.source.len() {
875                        let cc = self.source[self.pos];
876                        pattern.push(cc);
877                        self.advance();
878                        if cc == ']' {
879                            break;
880                        }
881                    }
882                } else if c == '\n' || c == '\r' {
883                    break;
884                } else {
885                    pattern.push(c);
886                    self.advance();
887                }
888            }
889
890            while self.pos < self.source.len() {
891                let c = self.source[self.pos];
892                if c.is_ascii_alphabetic() {
893                    flags.push(c);
894                    self.advance();
895                } else {
896                    break;
897                }
898            }
899
900            self.last_token_kind = LastTokenKind::Dividend;
901
902            return Token {
903                token_type: TokenType::Regex,
904                value: format!("{}/{}", pattern, flags),
905                line: self.line,
906                column: self.column,
907            };
908        }
909        self.last_token_kind = LastTokenKind::RegexPrefix;
910        Token {
911            token_type: TokenType::Punctuator,
912            value: "/".to_string(),
913            line: self.line,
914            column: self.column,
915        }
916    }
917
918    fn read_punctuator(&mut self) -> Token {
919        let c = self.source[self.pos];
920        self.advance();
921
922        let value: String = if self.pos < self.source.len() {
923            let next = self.source[self.pos];
924            match c {
925                '<' if next == '<' => {
926                    self.advance();
927                    if self.pos < self.source.len() && self.source[self.pos] == '=' {
928                        self.advance();
929                        "<<=".to_string()
930                    } else {
931                        "<<".to_string()
932                    }
933                }
934                '<' if next == '=' => {
935                    self.advance();
936                    "<=".to_string()
937                }
938                '>' if next == '>' => {
939                    self.advance();
940                    if self.pos < self.source.len() && self.source[self.pos] == '>' {
941                        self.advance();
942                        if self.pos < self.source.len() && self.source[self.pos] == '=' {
943                            self.advance();
944                            ">>>=".to_string()
945                        } else {
946                            ">>>".to_string()
947                        }
948                    } else if self.pos < self.source.len() && self.source[self.pos] == '=' {
949                        self.advance();
950                        ">>=".to_string()
951                    } else {
952                        ">>".to_string()
953                    }
954                }
955                '>' if next == '=' => {
956                    self.advance();
957                    ">=".to_string()
958                }
959                '=' if next == '>' => {
960                    self.advance();
961                    "=>".to_string()
962                }
963                '.' if next == '.' => {
964                    self.advance();
965                    if self.pos < self.source.len() && self.source[self.pos] == '.' {
966                        self.advance();
967                        "...".to_string()
968                    } else {
969                        "..".to_string()
970                    }
971                }
972                '=' if next == '=' => {
973                    self.advance();
974                    if self.pos < self.source.len() && self.source[self.pos] == '=' {
975                        self.advance();
976                        "===".to_string()
977                    } else {
978                        "==".to_string()
979                    }
980                }
981                '!' if next == '=' => {
982                    self.advance();
983                    if self.pos < self.source.len() && self.source[self.pos] == '=' {
984                        self.advance();
985                        "!==".to_string()
986                    } else {
987                        "!=".to_string()
988                    }
989                }
990                '*' if next == '=' => {
991                    self.advance();
992                    "*=".to_string()
993                }
994                '/' if next == '=' => {
995                    self.advance();
996                    "/=".to_string()
997                }
998                '%' if next == '=' => {
999                    self.advance();
1000                    "%=".to_string()
1001                }
1002                '+' if next == '=' => {
1003                    self.advance();
1004                    "+=".to_string()
1005                }
1006                '-' if next == '=' => {
1007                    self.advance();
1008                    "-=".to_string()
1009                }
1010                '&' if next == '=' => {
1011                    self.advance();
1012                    "&=".to_string()
1013                }
1014                '|' if next == '=' => {
1015                    self.advance();
1016                    "|=".to_string()
1017                }
1018                '^' if next == '=' => {
1019                    self.advance();
1020                    "^=".to_string()
1021                }
1022                '<' if next == '=' => {
1023                    self.advance();
1024                    "<=".to_string()
1025                }
1026                '>' if next == '=' => {
1027                    self.advance();
1028                    ">=".to_string()
1029                }
1030                '&' if next == '&' => {
1031                    self.advance();
1032                    if self.pos < self.source.len() && self.source[self.pos] == '=' {
1033                        self.advance();
1034                        "&&=".to_string()
1035                    } else {
1036                        "&&".to_string()
1037                    }
1038                }
1039                '|' if next == '|' => {
1040                    self.advance();
1041                    if self.pos < self.source.len() && self.source[self.pos] == '=' {
1042                        self.advance();
1043                        "||=".to_string()
1044                    } else {
1045                        "||".to_string()
1046                    }
1047                }
1048                '+' if next == '+' => {
1049                    self.advance();
1050                    "++".to_string()
1051                }
1052                '-' if next == '-' => {
1053                    self.advance();
1054                    "--".to_string()
1055                }
1056                '?' if next == '?' => {
1057                    self.advance();
1058
1059                    if self.pos < self.source.len() && self.source[self.pos] == '=' {
1060                        self.advance();
1061                        "??=".to_string()
1062                    } else {
1063                        "??".to_string()
1064                    }
1065                }
1066                '?' if next == '.' => {
1067                    self.advance();
1068                    "?.".to_string()
1069                }
1070                '*' if next == '*' => {
1071                    self.advance();
1072                    if self.pos < self.source.len() && self.source[self.pos] == '=' {
1073                        self.advance();
1074                        "**=".to_string()
1075                    } else {
1076                        "**".to_string()
1077                    }
1078                }
1079                _ => c.to_string(),
1080            }
1081        } else {
1082            c.to_string()
1083        };
1084
1085        Token {
1086            token_type: TokenType::Punctuator,
1087            value,
1088            line: self.line,
1089            column: self.column,
1090        }
1091    }
1092
1093    fn is_identifier_start(c: char) -> bool {
1094        if c == '$' || c == '_' {
1095            return true;
1096        }
1097        if c.is_ascii() {
1098            return c.is_ascii_alphabetic();
1099        }
1100        crate::builtins::unicode_data::XID_START.contains(c as u32)
1101            || matches!(c, '\u{2118}' | '\u{212E}' | '\u{309B}' | '\u{309C}')
1102    }
1103
1104    fn is_identifier_part(c: char) -> bool {
1105        if c == '$' || c == '_' {
1106            return true;
1107        }
1108        if c.is_ascii() {
1109            return c.is_ascii_alphanumeric();
1110        }
1111        if c == '\u{200C}' || c == '\u{200D}' {
1112            return true;
1113        }
1114        Self::is_identifier_start(c)
1115            || crate::builtins::unicode_data::XID_CONTINUE.contains(c as u32)
1116    }
1117
1118    fn is_keyword(s: &str) -> bool {
1119        matches!(
1120            s,
1121            "break"
1122                | "case"
1123                | "catch"
1124                | "class"
1125                | "const"
1126                | "continue"
1127                | "debugger"
1128                | "default"
1129                | "delete"
1130                | "do"
1131                | "else"
1132                | "export"
1133                | "extends"
1134                | "finally"
1135                | "for"
1136                | "function"
1137                | "if"
1138                | "import"
1139                | "in"
1140                | "instanceof"
1141                | "let"
1142                | "new"
1143                | "return"
1144                | "super"
1145                | "switch"
1146                | "this"
1147                | "throw"
1148                | "try"
1149                | "typeof"
1150                | "var"
1151                | "void"
1152                | "while"
1153                | "with"
1154                | "yield"
1155                | "async"
1156                | "await"
1157                | "static"
1158                | "get"
1159                | "set"
1160                | "true"
1161                | "false"
1162                | "null"
1163                | "from"
1164                | "as"
1165                | "of"
1166        )
1167    }
1168
1169    pub fn read_template_chars(&mut self) -> Option<String> {
1170        let mut result = String::new();
1171        while self.pos < self.source.len() {
1172            let c = self.source[self.pos];
1173
1174            if c == '$' && self.pos + 1 < self.source.len() && self.source[self.pos + 1] == '{' {
1175                break;
1176            }
1177
1178            if c == '`' {
1179                break;
1180            }
1181            result.push(c);
1182            self.advance();
1183        }
1184        if result.is_empty() {
1185            None
1186        } else {
1187            Some(result)
1188        }
1189    }
1190
1191    fn scan_template_segment(&mut self) -> (String, bool) {
1192        let mut value = String::new();
1193        while self.pos < self.source.len() {
1194            let c = self.source[self.pos];
1195
1196            if c == '$' && self.pos + 1 < self.source.len() && self.source[self.pos + 1] == '{' {
1197                self.advance();
1198                self.advance();
1199                return (value, true);
1200            }
1201
1202            if c == '`' {
1203                self.advance();
1204                return (value, false);
1205            }
1206
1207            if c == '\\' {
1208                self.advance();
1209                if self.pos >= self.source.len() {
1210                    break;
1211                }
1212                let esc = self.source[self.pos];
1213                match esc {
1214                    'n' => value.push('\n'),
1215                    'r' => value.push('\r'),
1216                    't' => value.push('\t'),
1217                    'b' => value.push('\x08'),
1218                    'f' => value.push('\x0c'),
1219                    'v' => value.push('\x0b'),
1220                    '0' => value.push('\0'),
1221                    '\\' => value.push('\\'),
1222                    '\'' => value.push('\''),
1223                    '"' => value.push('"'),
1224                    '`' => value.push('`'),
1225                    '$' => value.push('$'),
1226                    '\n' => {
1227                        self.line += 1;
1228                        self.column = 1;
1229                        self.pos += 1;
1230                        continue;
1231                    }
1232                    '\r' => {
1233                        self.line += 1;
1234                        self.column = 1;
1235                        self.pos += 1;
1236
1237                        if self.pos < self.source.len() && self.source[self.pos] == '\n' {
1238                            self.pos += 1;
1239                        }
1240                        continue;
1241                    }
1242                    'x' => {
1243                        if self.pos + 2 < self.source.len() {
1244                            let h1 = self.source[self.pos + 1];
1245                            let h2 = self.source[self.pos + 2];
1246                            if let (Some(a), Some(b)) = (h1.to_digit(16), h2.to_digit(16)) {
1247                                let code = (a << 4) | b;
1248                                if let Some(ch) = char::from_u32(code) {
1249                                    value.push(ch);
1250                                }
1251                                self.advance();
1252                                self.advance();
1253                            } else {
1254                                value.push('x');
1255                            }
1256                        } else {
1257                            value.push('x');
1258                        }
1259                    }
1260                    'u' => {
1261                        if self.pos + 1 < self.source.len() && self.source[self.pos + 1] == '{' {
1262                            self.advance();
1263                            self.advance();
1264                            let mut hex = String::new();
1265                            while self.pos < self.source.len() && self.source[self.pos] != '}' {
1266                                hex.push(self.source[self.pos]);
1267                                self.advance();
1268                            }
1269
1270                            if let Ok(code) = u32::from_str_radix(&hex, 16) {
1271                                if let Some(ch) = char::from_u32(code) {
1272                                    value.push(ch);
1273                                }
1274                            }
1275                        } else if self.pos + 4 < self.source.len() {
1276                            let h1 = self.source[self.pos + 1];
1277                            let h2 = self.source[self.pos + 2];
1278                            let h3 = self.source[self.pos + 3];
1279                            let h4 = self.source[self.pos + 4];
1280                            if let (Some(a), Some(b), Some(c), Some(d)) = (
1281                                h1.to_digit(16),
1282                                h2.to_digit(16),
1283                                h3.to_digit(16),
1284                                h4.to_digit(16),
1285                            ) {
1286                                let code = (a << 12) | (b << 8) | (c << 4) | d;
1287                                if let Some(decoded) = char::from_u32(code) {
1288                                    value.push(decoded);
1289                                }
1290                                self.advance();
1291                                self.advance();
1292                                self.advance();
1293                                self.advance();
1294                            } else {
1295                                value.push('u');
1296                            }
1297                        } else {
1298                            value.push('u');
1299                        }
1300                    }
1301                    _ => value.push(esc),
1302                }
1303                self.advance();
1304                continue;
1305            }
1306
1307            if c == '\n' {
1308                value.push('\n');
1309                self.pos += 1;
1310                self.line += 1;
1311                self.column = 1;
1312                continue;
1313            }
1314            if c == '\r' {
1315                value.push('\n');
1316                self.pos += 1;
1317                self.line += 1;
1318                self.column = 1;
1319                if self.pos < self.source.len() && self.source[self.pos] == '\n' {
1320                    self.pos += 1;
1321                }
1322                continue;
1323            }
1324
1325            value.push(c);
1326            self.advance();
1327        }
1328
1329        (value, false)
1330    }
1331
1332    pub fn scan_template_continuation(&mut self) -> Option<Token> {
1333        let (value, terminated_by_interp) = self.scan_template_segment();
1334        let token_type = if terminated_by_interp {
1335            TokenType::TemplateMiddle
1336        } else {
1337            TokenType::TemplateTail
1338        };
1339        self.last_token_kind = LastTokenKind::Dividend;
1340        Some(Token {
1341            token_type,
1342            value,
1343            line: self.line,
1344            column: self.column,
1345        })
1346    }
1347
1348    pub fn source_from_pos(&self) -> String {
1349        self.source[self.pos..].iter().collect()
1350    }
1351
1352    pub fn advance_char(&mut self) -> Option<char> {
1353        if self.pos < self.source.len() {
1354            let c = self.source[self.pos];
1355            self.advance();
1356            Some(c)
1357        } else {
1358            None
1359        }
1360    }
1361
1362    pub fn at_str(&self, s: &str) -> bool {
1363        let chars: Vec<char> = s.chars().collect();
1364        if self.pos + chars.len() > self.source.len() {
1365            return false;
1366        }
1367        for (i, c) in chars.iter().enumerate() {
1368            if self.source[self.pos + i] != *c {
1369                return false;
1370            }
1371        }
1372        true
1373    }
1374}